aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/ia64/fpu
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2001-02-19 09:09:18 +0000
committerUlrich Drepper <drepper@redhat.com>2001-02-19 09:09:18 +0000
commit8da2915d5dcfa51cb5f9e55f7716b49858c1d59d (patch)
treeaa219472cc41fcb82789b12723628cb6a33cc774 /sysdeps/ia64/fpu
parente208f556cad11f729533385e46e4191fcc49aa0a (diff)
downloadglibc-8da2915d5dcfa51cb5f9e55f7716b49858c1d59d.zip
glibc-8da2915d5dcfa51cb5f9e55f7716b49858c1d59d.tar.gz
glibc-8da2915d5dcfa51cb5f9e55f7716b49858c1d59d.tar.bz2
Update.
2001-02-19 Ulrich Drepper <drepper@redhat.com> * libio/iogetline.c: Move return until after last statement. * localedata/show-ucs-data.c: Don't show < > for better readability. * sysdeps/ia64/fpu/Dist: New file. * sysdeps/ia64/fpu/Makefile: New file. * sysdeps/ia64/fpu/Versions: New file. * sysdeps/ia64/fpu/e_acos.S: New file. * sysdeps/ia64/fpu/e_acosf.S: New file. * sysdeps/ia64/fpu/e_acosl.S: New file. * sysdeps/ia64/fpu/e_asin.S: New file. * sysdeps/ia64/fpu/e_asinf.S: New file. * sysdeps/ia64/fpu/e_asinl.S: New file. * sysdeps/ia64/fpu/e_atan2.S: New file. * sysdeps/ia64/fpu/e_atan2f.S: New file. * sysdeps/ia64/fpu/e_atan2l.c: New file. * sysdeps/ia64/fpu/e_cosh.S: New file. * sysdeps/ia64/fpu/e_coshf.S: New file. * sysdeps/ia64/fpu/e_coshl.S: New file. * sysdeps/ia64/fpu/e_exp.S: New file. * sysdeps/ia64/fpu/e_expf.S: New file. * sysdeps/ia64/fpu/e_expl.c: New file. * sysdeps/ia64/fpu/e_fmod.S: New file. * sysdeps/ia64/fpu/e_fmodf.S: New file. * sysdeps/ia64/fpu/e_fmodl.S: New file. * sysdeps/ia64/fpu/e_hypot.S: New file. * sysdeps/ia64/fpu/e_hypotf.S: New file. * sysdeps/ia64/fpu/e_hypotl.S: New file. * sysdeps/ia64/fpu/e_log.S: New file. * sysdeps/ia64/fpu/e_log10.c: New file. * sysdeps/ia64/fpu/e_log10f.c: New file. * sysdeps/ia64/fpu/e_log10l.c: New file. * sysdeps/ia64/fpu/e_logf.S: New file. * sysdeps/ia64/fpu/e_logl.c: New file. * sysdeps/ia64/fpu/e_pow.S: New file. * sysdeps/ia64/fpu/e_powf.S: New file. * sysdeps/ia64/fpu/e_powl.S: New file. * sysdeps/ia64/fpu/e_rem_pio2.c: New file. * sysdeps/ia64/fpu/e_rem_pio2f.c: New file. * sysdeps/ia64/fpu/e_remainder.S: New file. * sysdeps/ia64/fpu/e_remainderf.S: New file. * sysdeps/ia64/fpu/e_remainderl.S: New file. * sysdeps/ia64/fpu/e_scalb.S: New file. * sysdeps/ia64/fpu/e_scalbf.S: New file. * sysdeps/ia64/fpu/e_scalbl.S: New file. * sysdeps/ia64/fpu/e_sinh.S: New file. * sysdeps/ia64/fpu/e_sinhf.S: New file. * sysdeps/ia64/fpu/e_sinhl.S: New file. * sysdeps/ia64/fpu/e_sqrt.S: New file. * sysdeps/ia64/fpu/e_sqrtf.S: New file. * sysdeps/ia64/fpu/e_sqrtl.S: New file. * sysdeps/ia64/fpu/k_rem_pio2.c: New file. * sysdeps/ia64/fpu/k_rem_pio2f.c: New file. * sysdeps/ia64/fpu/k_rem_pio2l.c: New file. * sysdeps/ia64/fpu/libm_atan2_reg.S: New file. * sysdeps/ia64/fpu/libm_error.c: New file. * sysdeps/ia64/fpu/libm_frexp4.S: New file. * sysdeps/ia64/fpu/libm_frexp4f.S: New file. * sysdeps/ia64/fpu/libm_frexp4l.S: New file. * sysdeps/ia64/fpu/libm_reduce.S: New file. * sysdeps/ia64/fpu/libm_support.h: New file. * sysdeps/ia64/fpu/libm_tan.S: New file. * sysdeps/ia64/fpu/s_atan.S: New file. * sysdeps/ia64/fpu/s_atanf.S: New file. * sysdeps/ia64/fpu/s_atanl.S: New file. * sysdeps/ia64/fpu/s_cbrt.S: New file. * sysdeps/ia64/fpu/s_cbrtf.S: New file. * sysdeps/ia64/fpu/s_cbrtl.S: New file. * sysdeps/ia64/fpu/s_ceil.S: New file. * sysdeps/ia64/fpu/s_ceilf.S: New file. * sysdeps/ia64/fpu/s_ceill.S: New file. * sysdeps/ia64/fpu/s_cos.S: New file. * sysdeps/ia64/fpu/s_cosf.S: New file. * sysdeps/ia64/fpu/s_cosl.S: New file. * sysdeps/ia64/fpu/s_expm1.S: New file. * sysdeps/ia64/fpu/s_expm1f.S: New file. * sysdeps/ia64/fpu/s_expm1l.S: New file. * sysdeps/ia64/fpu/s_floor.S: New file. * sysdeps/ia64/fpu/s_floorf.S: New file. * sysdeps/ia64/fpu/s_floorl.S: New file. * sysdeps/ia64/fpu/s_frexp.c: New file. * sysdeps/ia64/fpu/s_frexpf.c: New file. * sysdeps/ia64/fpu/s_frexpl.c: New file. * sysdeps/ia64/fpu/s_ilogb.S: New file. * sysdeps/ia64/fpu/s_ilogbf.S: New file. * sysdeps/ia64/fpu/s_ilogbl.S: New file. * sysdeps/ia64/fpu/s_ldexp.S: New file. * sysdeps/ia64/fpu/s_ldexpf.S: New file. * sysdeps/ia64/fpu/s_ldexpl.S: New file. * sysdeps/ia64/fpu/s_log1p.S: New file. * sysdeps/ia64/fpu/s_log1pf.S: New file. * sysdeps/ia64/fpu/s_log1pl.S: New file. * sysdeps/ia64/fpu/s_logb.S: New file. * sysdeps/ia64/fpu/s_logbf.S: New file. * sysdeps/ia64/fpu/s_logbl.S: New file. * sysdeps/ia64/fpu/s_matherrf.c: New file. * sysdeps/ia64/fpu/s_matherrl.c: New file. * sysdeps/ia64/fpu/s_modf.S: New file. * sysdeps/ia64/fpu/s_modff.S: New file. * sysdeps/ia64/fpu/s_modfl.S: New file. * sysdeps/ia64/fpu/s_nearbyint.S: New file. * sysdeps/ia64/fpu/s_nearbyintf.S: New file. * sysdeps/ia64/fpu/s_nearbyintl.S: New file. * sysdeps/ia64/fpu/s_rint.S: New file. * sysdeps/ia64/fpu/s_rintf.S: New file. * sysdeps/ia64/fpu/s_rintl.S: New file. * sysdeps/ia64/fpu/s_round.S: New file. * sysdeps/ia64/fpu/s_roundf.S: New file. * sysdeps/ia64/fpu/s_roundl.S: New file. * sysdeps/ia64/fpu/s_scalbn.S: New file. * sysdeps/ia64/fpu/s_scalbnf.S: New file. * sysdeps/ia64/fpu/s_scalbnl.S: New file. * sysdeps/ia64/fpu/s_significand.S: New file. * sysdeps/ia64/fpu/s_significandf.S: New file. * sysdeps/ia64/fpu/s_significandl.S: New file. * sysdeps/ia64/fpu/s_sin.c: New file. * sysdeps/ia64/fpu/s_sincos.c: New file. * sysdeps/ia64/fpu/s_sincosf.c: New file. * sysdeps/ia64/fpu/s_sincosl.c: New file. * sysdeps/ia64/fpu/s_sinf.c: New file. * sysdeps/ia64/fpu/s_sinl.c: New file. * sysdeps/ia64/fpu/s_tan.S: New file. * sysdeps/ia64/fpu/s_tanf.S: New file. * sysdeps/ia64/fpu/s_tanl.S: New file. * sysdeps/ia64/fpu/s_trunc.S: New file. * sysdeps/ia64/fpu/s_truncf.S: New file. * sysdeps/ia64/fpu/s_truncl.S: New file. * sysdeps/ia64/fpu/w_acos.c: New file. * sysdeps/ia64/fpu/w_acosf.c: New file. * sysdeps/ia64/fpu/w_acosl.c: New file. * sysdeps/ia64/fpu/w_asin.c: New file. * sysdeps/ia64/fpu/w_asinf.c: New file. * sysdeps/ia64/fpu/w_asinl.c: New file. * sysdeps/ia64/fpu/w_atan2.c: New file. * sysdeps/ia64/fpu/w_atan2f.c: New file. * sysdeps/ia64/fpu/w_atan2l.c: New file. * sysdeps/ia64/fpu/w_cosh.c: New file. * sysdeps/ia64/fpu/w_coshf.c: New file. * sysdeps/ia64/fpu/w_coshl.c: New file. * sysdeps/ia64/fpu/w_exp.c: New file. * sysdeps/ia64/fpu/w_expf.c: New file. * sysdeps/ia64/fpu/w_fmod.c: New file. * sysdeps/ia64/fpu/w_fmodf.c: New file. * sysdeps/ia64/fpu/w_fmodl.c: New file. * sysdeps/ia64/fpu/w_hypot.c: New file. * sysdeps/ia64/fpu/w_hypotf.c: New file. * sysdeps/ia64/fpu/w_hypotl.c: New file. * sysdeps/ia64/fpu/w_log.c: New file. * sysdeps/ia64/fpu/w_log10.c: New file. * sysdeps/ia64/fpu/w_log10f.c: New file. * sysdeps/ia64/fpu/w_log10l.c: New file. * sysdeps/ia64/fpu/w_logf.c: New file. * sysdeps/ia64/fpu/w_logl.c: New file. * sysdeps/ia64/fpu/w_pow.c: New file. * sysdeps/ia64/fpu/w_powf.c: New file. * sysdeps/ia64/fpu/w_powl.c: New file. * sysdeps/ia64/fpu/w_remainder.c: New file. * sysdeps/ia64/fpu/w_remainderf.c: New file. * sysdeps/ia64/fpu/w_remainderl.c: New file. * sysdeps/ia64/fpu/w_scalb.c: New file. * sysdeps/ia64/fpu/w_scalbf.c: New file. * sysdeps/ia64/fpu/w_scalbl.c: New file. * sysdeps/ia64/fpu/w_sqrt.c: New file. * sysdeps/ia64/fpu/w_sqrtf.c: New file. * sysdeps/ia64/fpu/w_sqrtl.c: New file. * sysdeps/ia64/fpu/libm-test-ulps: Adjust for long double implementation. * sysdeps/ia64/fpu/bits/mathdef.h: Correct float_t and double_t types. Change FP_ILOGBNAN for new implementation. * Verions.def: Add 2.2.3 versions.
Diffstat (limited to 'sysdeps/ia64/fpu')
-rw-r--r--sysdeps/ia64/fpu/Dist6
-rw-r--r--sysdeps/ia64/fpu/Makefile7
-rw-r--r--sysdeps/ia64/fpu/Versions10
-rw-r--r--sysdeps/ia64/fpu/bits/mathdef.h17
-rw-r--r--sysdeps/ia64/fpu/e_acos.S904
-rw-r--r--sysdeps/ia64/fpu/e_acosf.S693
-rw-r--r--sysdeps/ia64/fpu/e_acosl.S1094
-rw-r--r--sysdeps/ia64/fpu/e_asin.S884
-rw-r--r--sysdeps/ia64/fpu/e_asinf.S674
-rw-r--r--sysdeps/ia64/fpu/e_asinl.S777
-rw-r--r--sysdeps/ia64/fpu/e_atan2.S1124
-rw-r--r--sysdeps/ia64/fpu/e_atan2f.S907
-rw-r--r--sysdeps/ia64/fpu/e_atan2l.c1
-rw-r--r--sysdeps/ia64/fpu/e_cosh.S1142
-rw-r--r--sysdeps/ia64/fpu/e_coshf.S1133
-rw-r--r--sysdeps/ia64/fpu/e_coshl.S1150
-rw-r--r--sysdeps/ia64/fpu/e_exp.S815
-rw-r--r--sysdeps/ia64/fpu/e_expf.S768
-rw-r--r--sysdeps/ia64/fpu/e_expl.c1
-rw-r--r--sysdeps/ia64/fpu/e_fmod.S538
-rw-r--r--sysdeps/ia64/fpu/e_fmodf.S553
-rw-r--r--sysdeps/ia64/fpu/e_fmodl.S577
-rw-r--r--sysdeps/ia64/fpu/e_hypot.S438
-rw-r--r--sysdeps/ia64/fpu/e_hypotf.S394
-rw-r--r--sysdeps/ia64/fpu/e_hypotl.S478
-rw-r--r--sysdeps/ia64/fpu/e_log.S1091
-rw-r--r--sysdeps/ia64/fpu/e_log10.c1
-rw-r--r--sysdeps/ia64/fpu/e_log10f.c1
-rw-r--r--sysdeps/ia64/fpu/e_log10l.c1
-rw-r--r--sysdeps/ia64/fpu/e_logf.S946
-rw-r--r--sysdeps/ia64/fpu/e_logl.c1
-rw-r--r--sysdeps/ia64/fpu/e_pow.S2309
-rw-r--r--sysdeps/ia64/fpu/e_powf.S2309
-rw-r--r--sysdeps/ia64/fpu/e_powl.S3437
-rw-r--r--sysdeps/ia64/fpu/e_rem_pio2.c1
-rw-r--r--sysdeps/ia64/fpu/e_rem_pio2f.c1
-rw-r--r--sysdeps/ia64/fpu/e_remainder.S592
-rw-r--r--sysdeps/ia64/fpu/e_remainderf.S611
-rw-r--r--sysdeps/ia64/fpu/e_remainderl.S619
-rw-r--r--sysdeps/ia64/fpu/e_scalb.S551
-rw-r--r--sysdeps/ia64/fpu/e_scalbf.S551
-rw-r--r--sysdeps/ia64/fpu/e_scalbl.S551
-rw-r--r--sysdeps/ia64/fpu/e_sinh.S1310
-rw-r--r--sysdeps/ia64/fpu/e_sinhf.S1311
-rw-r--r--sysdeps/ia64/fpu/e_sinhl.S1311
-rw-r--r--sysdeps/ia64/fpu/e_sqrt.S347
-rw-r--r--sysdeps/ia64/fpu/e_sqrtf.S266
-rw-r--r--sysdeps/ia64/fpu/e_sqrtl.S281
-rw-r--r--sysdeps/ia64/fpu/k_rem_pio2.c1
-rw-r--r--sysdeps/ia64/fpu/k_rem_pio2f.c1
-rw-r--r--sysdeps/ia64/fpu/k_rem_pio2l.c1
-rw-r--r--sysdeps/ia64/fpu/libm-test-ulps130
-rw-r--r--sysdeps/ia64/fpu/libm_atan2_reg.S1221
-rw-r--r--sysdeps/ia64/fpu/libm_error.c3545
-rw-r--r--sysdeps/ia64/fpu/libm_frexp4.S185
-rw-r--r--sysdeps/ia64/fpu/libm_frexp4f.S185
-rw-r--r--sysdeps/ia64/fpu/libm_frexp4l.S184
-rw-r--r--sysdeps/ia64/fpu/libm_reduce.S1527
-rw-r--r--sysdeps/ia64/fpu/libm_support.h339
-rw-r--r--sysdeps/ia64/fpu/libm_tan.S3319
-rw-r--r--sysdeps/ia64/fpu/s_atan.S953
-rw-r--r--sysdeps/ia64/fpu/s_atanf.S543
-rw-r--r--sysdeps/ia64/fpu/s_atanl.S1994
-rw-r--r--sysdeps/ia64/fpu/s_cbrt.S676
-rw-r--r--sysdeps/ia64/fpu/s_cbrtf.S655
-rw-r--r--sysdeps/ia64/fpu/s_cbrtl.S889
-rw-r--r--sysdeps/ia64/fpu/s_ceil.S249
-rw-r--r--sysdeps/ia64/fpu/s_ceilf.S249
-rw-r--r--sysdeps/ia64/fpu/s_ceill.S249
-rw-r--r--sysdeps/ia64/fpu/s_cos.S3488
-rw-r--r--sysdeps/ia64/fpu/s_cosf.S686
-rw-r--r--sysdeps/ia64/fpu/s_cosl.S2506
-rw-r--r--sysdeps/ia64/fpu/s_expm1.S1755
-rw-r--r--sysdeps/ia64/fpu/s_expm1f.S1742
-rw-r--r--sysdeps/ia64/fpu/s_expm1l.S1603
-rw-r--r--sysdeps/ia64/fpu/s_floor.S227
-rw-r--r--sysdeps/ia64/fpu/s_floorf.S224
-rw-r--r--sysdeps/ia64/fpu/s_floorl.S224
-rw-r--r--sysdeps/ia64/fpu/s_frexp.c44
-rw-r--r--sysdeps/ia64/fpu/s_frexpf.c44
-rw-r--r--sysdeps/ia64/fpu/s_frexpl.c44
-rw-r--r--sysdeps/ia64/fpu/s_ilogb.S240
-rw-r--r--sysdeps/ia64/fpu/s_ilogbf.S240
-rw-r--r--sysdeps/ia64/fpu/s_ilogbl.S240
-rw-r--r--sysdeps/ia64/fpu/s_ldexp.S367
-rw-r--r--sysdeps/ia64/fpu/s_ldexpf.S366
-rw-r--r--sysdeps/ia64/fpu/s_ldexpl.S366
-rw-r--r--sysdeps/ia64/fpu/s_log1p.S1614
-rw-r--r--sysdeps/ia64/fpu/s_log1pf.S1616
-rw-r--r--sysdeps/ia64/fpu/s_log1pl.S1663
-rw-r--r--sysdeps/ia64/fpu/s_logb.S314
-rw-r--r--sysdeps/ia64/fpu/s_logbf.S301
-rw-r--r--sysdeps/ia64/fpu/s_logbl.S286
-rw-r--r--sysdeps/ia64/fpu/s_matherrf.c33
-rw-r--r--sysdeps/ia64/fpu/s_matherrl.c33
-rw-r--r--sysdeps/ia64/fpu/s_modf.S272
-rw-r--r--sysdeps/ia64/fpu/s_modff.S272
-rw-r--r--sysdeps/ia64/fpu/s_modfl.S267
-rw-r--r--sysdeps/ia64/fpu/s_nearbyint.S221
-rw-r--r--sysdeps/ia64/fpu/s_nearbyintf.S221
-rw-r--r--sysdeps/ia64/fpu/s_nearbyintl.S218
-rw-r--r--sysdeps/ia64/fpu/s_rint.S241
-rw-r--r--sysdeps/ia64/fpu/s_rintf.S241
-rw-r--r--sysdeps/ia64/fpu/s_rintl.S239
-rw-r--r--sysdeps/ia64/fpu/s_round.S236
-rw-r--r--sysdeps/ia64/fpu/s_roundf.S236
-rw-r--r--sysdeps/ia64/fpu/s_roundl.S236
-rw-r--r--sysdeps/ia64/fpu/s_scalbn.S366
-rw-r--r--sysdeps/ia64/fpu/s_scalbnf.S366
-rw-r--r--sysdeps/ia64/fpu/s_scalbnl.S366
-rw-r--r--sysdeps/ia64/fpu/s_significand.S147
-rw-r--r--sysdeps/ia64/fpu/s_significandf.S146
-rw-r--r--sysdeps/ia64/fpu/s_significandl.S147
-rw-r--r--sysdeps/ia64/fpu/s_sin.c1
-rw-r--r--sysdeps/ia64/fpu/s_sincos.c9
-rw-r--r--sysdeps/ia64/fpu/s_sincosf.c9
-rw-r--r--sysdeps/ia64/fpu/s_sincosl.c9
-rw-r--r--sysdeps/ia64/fpu/s_sinf.c1
-rw-r--r--sysdeps/ia64/fpu/s_sinl.c1
-rw-r--r--sysdeps/ia64/fpu/s_tan.S757
-rw-r--r--sysdeps/ia64/fpu/s_tanf.S757
-rw-r--r--sysdeps/ia64/fpu/s_tanl.S3057
-rw-r--r--sysdeps/ia64/fpu/s_trunc.S188
-rw-r--r--sysdeps/ia64/fpu/s_truncf.S188
-rw-r--r--sysdeps/ia64/fpu/s_truncl.S188
-rw-r--r--sysdeps/ia64/fpu/w_acos.c1
-rw-r--r--sysdeps/ia64/fpu/w_acosf.c1
-rw-r--r--sysdeps/ia64/fpu/w_acosl.c1
-rw-r--r--sysdeps/ia64/fpu/w_asin.c1
-rw-r--r--sysdeps/ia64/fpu/w_asinf.c1
-rw-r--r--sysdeps/ia64/fpu/w_asinl.c1
-rw-r--r--sysdeps/ia64/fpu/w_atan2.c1
-rw-r--r--sysdeps/ia64/fpu/w_atan2f.c1
-rw-r--r--sysdeps/ia64/fpu/w_atan2l.c1
-rw-r--r--sysdeps/ia64/fpu/w_cosh.c1
-rw-r--r--sysdeps/ia64/fpu/w_coshf.c1
-rw-r--r--sysdeps/ia64/fpu/w_coshl.c1
-rw-r--r--sysdeps/ia64/fpu/w_exp.c1
-rw-r--r--sysdeps/ia64/fpu/w_expf.c1
-rw-r--r--sysdeps/ia64/fpu/w_fmod.c1
-rw-r--r--sysdeps/ia64/fpu/w_fmodf.c1
-rw-r--r--sysdeps/ia64/fpu/w_fmodl.c1
-rw-r--r--sysdeps/ia64/fpu/w_hypot.c1
-rw-r--r--sysdeps/ia64/fpu/w_hypotf.c1
-rw-r--r--sysdeps/ia64/fpu/w_hypotl.c1
-rw-r--r--sysdeps/ia64/fpu/w_log.c1
-rw-r--r--sysdeps/ia64/fpu/w_log10.c1
-rw-r--r--sysdeps/ia64/fpu/w_log10f.c1
-rw-r--r--sysdeps/ia64/fpu/w_log10l.c1
-rw-r--r--sysdeps/ia64/fpu/w_logf.c1
-rw-r--r--sysdeps/ia64/fpu/w_logl.c1
-rw-r--r--sysdeps/ia64/fpu/w_pow.c1
-rw-r--r--sysdeps/ia64/fpu/w_powf.c1
-rw-r--r--sysdeps/ia64/fpu/w_powl.c1
-rw-r--r--sysdeps/ia64/fpu/w_remainder.c1
-rw-r--r--sysdeps/ia64/fpu/w_remainderf.c1
-rw-r--r--sysdeps/ia64/fpu/w_remainderl.c1
-rw-r--r--sysdeps/ia64/fpu/w_scalb.c1
-rw-r--r--sysdeps/ia64/fpu/w_scalbf.c1
-rw-r--r--sysdeps/ia64/fpu/w_scalbl.c1
-rw-r--r--sysdeps/ia64/fpu/w_sqrt.c1
-rw-r--r--sysdeps/ia64/fpu/w_sqrtf.c1
-rw-r--r--sysdeps/ia64/fpu/w_sqrtl.c1
163 files changed, 80389 insertions, 97 deletions
diff --git a/sysdeps/ia64/fpu/Dist b/sysdeps/ia64/fpu/Dist
new file mode 100644
index 0000000..ae51e76
--- /dev/null
+++ b/sysdeps/ia64/fpu/Dist
@@ -0,0 +1,6 @@
+libm_atan2_reg.S
+libm_error.c
+libm_reduce.S
+libm_support.h
+s_matherrf
+s_matherrl
diff --git a/sysdeps/ia64/fpu/Makefile b/sysdeps/ia64/fpu/Makefile
new file mode 100644
index 0000000..e5237ff
--- /dev/null
+++ b/sysdeps/ia64/fpu/Makefile
@@ -0,0 +1,7 @@
+ifeq ($(subdir),math)
+libm-sysdep_routines += libm_atan2_reg s_matherrf s_matherrl libm_reduce \
+ libm_tan
+
+routines += libm_frexp4 libm_frexp4f libm_frexp4l libm_error
+CPPFLAGS += -DSIZE_INT_32
+endif
diff --git a/sysdeps/ia64/fpu/Versions b/sysdeps/ia64/fpu/Versions
new file mode 100644
index 0000000..6e46589
--- /dev/null
+++ b/sysdeps/ia64/fpu/Versions
@@ -0,0 +1,10 @@
+libc {
+ GLIBC_2.2.3 {
+ __libm_frexp_4; __libm_frexp_4f; __libm_frexp_4l; __libm_error_support;
+ }
+}
+libm {
+ GLIBC_2.2.3 {
+ matherrf; matherrl;
+ }
+}
diff --git a/sysdeps/ia64/fpu/bits/mathdef.h b/sysdeps/ia64/fpu/bits/mathdef.h
index 90c1e89..ad3b168 100644
--- a/sysdeps/ia64/fpu/bits/mathdef.h
+++ b/sysdeps/ia64/fpu/bits/mathdef.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2000 Free Software Foundation, Inc.
+/* Copyright (C) 2000, 2001 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -23,19 +23,18 @@
#if defined __USE_ISOC99 && defined _MATH_H && !defined _MATH_H_MATHDEF
# define _MATH_H_MATHDEF 1
-/* The ix87 FPUs evaluate all values in the 80 bit floating-point format
- which is also available for the user as `long double'. Therefore we
- define: */
-typedef long double float_t; /* `float' expressions are evaluated as
- `long double'. */
-typedef long double double_t; /* `double' expressions are evaluated as
- `long double'. */
+/* The IA-64 architecture computes values with the precision of the
+ used type. */
+typedef float float_t; /* `float' expressions are evaluated as
+ `float'. */
+typedef double double_t; /* `double' expressions are evaluated as
+ `double'. */
/* Define `INFINITY' as value of type `float'. */
# define INFINITY HUGE_VALF
/* The values returned by `ilogb' for 0 and NaN respectively. */
# define FP_ILOGB0 (-2147483647 - 1)
-# define FP_ILOGBNAN (-2147483647 - 1)
+# define FP_ILOGBNAN 2147483647
#endif /* ISO C99 */
diff --git a/sysdeps/ia64/fpu/e_acos.S b/sysdeps/ia64/fpu/e_acos.S
new file mode 100644
index 0000000..1d8085c
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_acos.S
@@ -0,0 +1,904 @@
+.file "acos.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+
+// History
+//==============================================================
+// 2/02/00 Initial version
+// 8/17/00 New and much faster algorithm.
+// 8/30/00 Avoided bank conflicts on loads, shortened |x|=1 and x=0 paths,
+// fixed mfb split issue stalls.
+
+// Description
+//=========================================
+// The acos function computes the principle value of the arc sine of x.
+// A doman error occurs for arguments not in the range [-1,+1].
+
+// The acos function returns the arc cosine in the range [0, +pi] radians.
+// acos(1) returns +0, acos(-1) returns pi, acos(0) returns pi/2.
+// acos(x) returns a Nan and raises the invalid exception for |x| >1
+
+// The acos function is just like asin except that pi/2 is added at the end.
+
+//
+// Assembly macros
+//=========================================
+
+#include "libm_support.h"
+
+// predicate registers
+//acos_pred_LEsqrt2by2 = p7
+//acos_pred_GTsqrt2by2 = p8
+
+// integer registers
+ASIN_Addr1 = r33
+ASIN_Addr2 = r34
+ASIN_FFFE = r35
+
+GR_SAVE_B0 = r36
+GR_SAVE_PFS = r37
+GR_SAVE_GP = r38
+
+GR_Parameter_X = r39
+GR_Parameter_Y = r40
+GR_Parameter_RESULT = r41
+GR_Parameter_Tag = r42
+
+// floating point registers
+acos_coeff_P1 = f32
+acos_coeff_P2 = f33
+acos_coeff_P3 = f34
+acos_coeff_P4 = f35
+
+acos_coeff_P5 = f36
+acos_coeff_P6 = f37
+acos_coeff_P7 = f38
+acos_coeff_P8 = f39
+acos_coeff_P9 = f40
+
+acos_coeff_P10 = f41
+acos_coeff_P11 = f42
+acos_coeff_P12 = f43
+acos_coeff_P13 = f44
+acos_coeff_P14 = f45
+
+acos_coeff_P15 = f46
+acos_coeff_P16 = f47
+acos_coeff_P17 = f48
+acos_coeff_P18 = f49
+acos_coeff_P19 = f50
+
+acos_coeff_P20 = f51
+acos_coeff_P21 = f52
+acos_const_sqrt2by2 = f53
+acos_const_piby2 = f54
+acos_abs_x = f55
+
+acos_tx = f56
+acos_tx2 = f57
+acos_tx3 = f58
+acos_tx4 = f59
+acos_tx8 = f60
+
+acos_tx11 = f61
+acos_1poly_p8 = f62
+acos_1poly_p19 = f63
+acos_1poly_p4 = f64
+acos_1poly_p15 = f65
+
+acos_1poly_p6 = f66
+acos_1poly_p17 = f67
+acos_1poly_p0 = f68
+acos_1poly_p11 = f69
+acos_1poly_p2 = f70
+
+acos_1poly_p13 = f71
+acos_series_tx = f72
+acos_t = f73
+acos_t2 = f74
+acos_t3 = f75
+
+acos_t4 = f76
+acos_t8 = f77
+acos_t11 = f78
+acos_poly_p8 = f79
+acos_poly_p19 = f80
+
+acos_poly_p4 = f81
+acos_poly_p15 = f82
+acos_poly_p6 = f83
+acos_poly_p17 = f84
+acos_poly_p0 = f85
+
+acos_poly_p11 = f86
+acos_poly_p2 = f87
+acos_poly_p13 = f88
+acos_series_t = f89
+acos_1by2 = f90
+
+acos_3by2 = f91
+acos_5by2 = f92
+acos_11by4 = f93
+acos_35by8 = f94
+acos_63by8 = f95
+
+acos_231by16 = f96
+acos_y0 = f97
+acos_H0 = f98
+acos_S0 = f99
+acos_d = f100
+
+acos_l1 = f101
+acos_d2 = f102
+acos_T0 = f103
+acos_d1 = f104
+acos_e0 = f105
+
+acos_l2 = f106
+acos_d3 = f107
+acos_T3 = f108
+acos_S1 = f109
+acos_e1 = f110
+
+acos_z = f111
+answer2 = f112
+acos_sgn_x = f113
+acos_429by16 = f114
+acos_18by4 = f115
+
+acos_3by4 = f116
+acos_l3 = f117
+acos_T6 = f118
+acos_const_add = f119
+
+// Data tables
+//==============================================================
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
+.align 16
+
+acos_coeff_1_table:
+ASM_TYPE_DIRECTIVE(acos_coeff_1_table,@object)
+data8 0xE4E7E0A423A21249 , 0x00003FF8 //P7
+data8 0xC2F7EE0200FCE2A5 , 0x0000C003 //P18
+data8 0xB745D7F6C65C20E0 , 0x00003FF9 //P5
+data8 0xF75E381A323D4D94 , 0x0000C002 //P16
+data8 0x8959C2629C1024C0 , 0x0000C002 //P20
+data8 0xAFF68E7D241292C5 , 0x00003FF8 //P9
+data8 0xB6DB6DB7260AC30D , 0x00003FFA //P3
+data8 0xD0417CE2B41CB7BF , 0x0000C000 //P14
+data8 0x81D570FEA724E3E4 , 0x0000BFFD //P12
+data8 0xAAAAAAAAAAAAC277 , 0x00003FFC //P1
+data8 0xF534912FF3E7B76F , 0x00003FFF //P21
+data8 0xc90fdaa22168c235 , 0x00003fff // pi/2
+data8 0x0000000000000000 , 0x00000000 // pad to avoid bank conflicts
+ASM_SIZE_DIRECTIVE(acos_coeff_1_table)
+
+
+acos_coeff_2_table:
+ASM_TYPE_DIRECTIVE(acos_coeff_2_table,@object)
+data8 0x8E26AF5F29B39A2A , 0x00003FF9 //P6
+data8 0xB4F118A4B1015470 , 0x00004003 //P17
+data8 0xF8E38E10C25990E0 , 0x00003FF9 //P4
+data8 0x80F50489AEF1CAC6 , 0x00004002 //P15
+data8 0x92728015172CFE1C , 0x00004003 //P19
+data8 0xBBC3D831D4595971 , 0x00003FF8 //P8
+data8 0x999999999952A5C3 , 0x00003FFB //P2
+data8 0x855576BE6F0975EC , 0x00003FFF //P13
+data8 0xF12420E778077D89 , 0x00003FFA //P11
+data8 0xB6590FF4D23DE003 , 0x00003FF3 //P10
+data8 0xb504f333f9de6484 , 0x00003ffe // sqrt(2)/2
+ASM_SIZE_DIRECTIVE(acos_coeff_2_table)
+
+
+.align 32
+.global acos
+ASM_TYPE_DIRECTIVE(acos,@function)
+
+.section .text
+.proc acos
+.align 32
+
+
+acos:
+
+{ .mfi
+ alloc r32 = ar.pfs,1,6,4,0
+ fma.s1 acos_tx = f8,f8,f0
+ addl ASIN_Addr2 = @ltoff(acos_coeff_2_table),gp
+}
+{ .mfi
+ mov ASIN_FFFE = 0xFFFE
+ fnma.s1 acos_t = f8,f8,f1
+ addl ASIN_Addr1 = @ltoff(acos_coeff_1_table),gp
+}
+;;
+
+
+{ .mfi
+ setf.exp acos_1by2 = ASIN_FFFE
+ fmerge.s acos_abs_x = f1,f8
+ nop.i 999 ;;
+}
+
+
+{ .mmf
+ ld8 ASIN_Addr1 = [ASIN_Addr1]
+ ld8 ASIN_Addr2 = [ASIN_Addr2]
+ fmerge.s acos_sgn_x = f8,f1
+}
+;;
+
+
+{ .mfi
+ nop.m 999
+ fcmp.lt.s1 p11,p12 = f8, f0
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ ldfe acos_coeff_P7 = [ASIN_Addr1],16
+ fma.s1 acos_tx2 = acos_tx,acos_tx,f0
+ nop.i 999
+}
+{ .mfi
+ ldfe acos_coeff_P6 = [ASIN_Addr2],16
+ fma.s1 acos_t2 = acos_t,acos_t,f0
+ nop.i 999;;
+}
+
+
+{ .mmf
+ ldfe acos_coeff_P18 = [ASIN_Addr1],16
+ ldfe acos_coeff_P17 = [ASIN_Addr2],16
+ fclass.m.unc p8,p0 = f8, 0xc3 //@qnan |@snan
+}
+;;
+
+
+{ .mmf
+ ldfe acos_coeff_P5 = [ASIN_Addr1],16
+ ldfe acos_coeff_P4 = [ASIN_Addr2],16
+ frsqrta.s1 acos_y0,p0 = acos_t
+}
+;;
+
+
+{ .mfi
+ ldfe acos_coeff_P16 = [ASIN_Addr1],16
+ fcmp.gt.s1 p9,p0 = acos_abs_x,f1
+ nop.i 999
+}
+{ .mfb
+ ldfe acos_coeff_P15 = [ASIN_Addr2],16
+(p8) fma.d f8 = f8,f1,f0
+(p8) br.ret.spnt b0
+}
+;;
+
+
+{ .mmf
+ ldfe acos_coeff_P20 = [ASIN_Addr1],16
+ ldfe acos_coeff_P19 = [ASIN_Addr2],16
+ fclass.m.unc p10,p0 = f8, 0x07 //@zero
+}
+;;
+
+
+{ .mfi
+ ldfe acos_coeff_P9 = [ASIN_Addr1],16
+ fma.s1 acos_t4 = acos_t2,acos_t2,f0
+(p9) mov GR_Parameter_Tag = 58
+}
+{ .mfi
+ ldfe acos_coeff_P8 = [ASIN_Addr2],16
+ fma.s1 acos_3by2 = acos_1by2,f1,f1
+ nop.i 999;;
+}
+
+
+{ .mfi
+ ldfe acos_coeff_P2 = [ASIN_Addr2],16
+ fma.s1 acos_tx4 = acos_tx2,acos_tx2,f0
+ nop.i 999
+}
+{ .mfb
+ ldfe acos_coeff_P3 = [ASIN_Addr1],16
+ fma.s1 acos_t3 = acos_t,acos_t2,f0
+(p9) br.cond.spnt __libm_error_region
+}
+;;
+
+
+{ .mfi
+ ldfe acos_coeff_P13 = [ASIN_Addr2],16
+ fma.s1 acos_H0 = acos_y0,acos_1by2,f0
+ nop.i 999
+}
+{ .mfi
+ ldfe acos_coeff_P14 = [ASIN_Addr1],16
+ fma.s1 acos_S0 = acos_y0,acos_t,f0
+ nop.i 999;;
+}
+
+
+{ .mfi
+ ldfe acos_coeff_P11 = [ASIN_Addr2],16
+ fcmp.eq.s1 p6,p0 = acos_abs_x, f1
+ nop.i 999
+}
+{ .mfi
+ ldfe acos_coeff_P12 = [ASIN_Addr1],16
+ fma.s1 acos_tx3 = acos_tx,acos_tx2,f0
+ nop.i 999
+}
+;;
+
+
+{ .mfi
+ ldfe acos_coeff_P10 = [ASIN_Addr2],16
+ fma.s1 acos_1poly_p6 = acos_tx,acos_coeff_P7,acos_coeff_P6
+ nop.i 999
+}
+{ .mfi
+ ldfe acos_coeff_P1 = [ASIN_Addr1],16
+ fma.s1 acos_poly_p6 = acos_t,acos_coeff_P7,acos_coeff_P6
+ nop.i 999;;
+}
+
+
+{ .mfi
+ ldfe acos_const_sqrt2by2 = [ASIN_Addr2],16
+ fma.s1 acos_5by2 = acos_3by2,f1,f1
+ nop.i 999
+}
+{ .mfi
+ ldfe acos_coeff_P21 = [ASIN_Addr1],16
+ fma.s1 acos_11by4 = acos_3by2,acos_3by2,acos_1by2
+ nop.i 999;;
+}
+
+
+{ .mfi
+ ldfe acos_const_piby2 = [ASIN_Addr1],16
+ fma.s1 acos_poly_p17 = acos_t,acos_coeff_P18,acos_coeff_P17
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+ fma.s1 acos_3by4 = acos_3by2,acos_1by2,f0
+(p10) br.cond.spnt L(ACOS_ZERO) // Branch to short path if x=0
+}
+;;
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 acos_poly_p15 = acos_t,acos_coeff_P16,acos_coeff_P15
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+ fnma.s1 acos_d = acos_S0,acos_H0,acos_1by2
+(p6) br.cond.spnt L(ACOS_ABS_ONE) // Branch to short path if |x|=1
+}
+;;
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 acos_poly_p19 = acos_t,acos_coeff_P20,acos_coeff_P19
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 acos_poly_p4 = acos_t,acos_coeff_P5,acos_coeff_P4
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 acos_1poly_p17 = acos_tx,acos_coeff_P18,acos_coeff_P17
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 acos_poly_p8 = acos_t,acos_coeff_P9,acos_coeff_P8
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fms.s1 acos_35by8 = acos_5by2,acos_11by4,acos_5by2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 acos_63by8 = acos_5by2,acos_11by4,f1
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 acos_poly_p13 = acos_t,acos_coeff_P14,acos_coeff_P13
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 acos_18by4 = acos_3by2,acos_5by2,acos_3by4
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 acos_l1 = acos_5by2,acos_d,acos_3by2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 acos_d2 = acos_d,acos_d,f0
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 acos_poly_p15 = acos_t2,acos_poly_p17,acos_poly_p15
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 acos_T0 = acos_d,acos_S0,f0
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 acos_poly_p19 = acos_t2,acos_coeff_P21,acos_poly_p19
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 acos_poly_p4 = acos_t2,acos_poly_p6,acos_poly_p4
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 acos_d1 = acos_35by8,acos_d,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 acos_231by16 = acos_3by2,acos_35by8,acos_63by8
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 acos_poly_p2 = acos_t,acos_coeff_P3,acos_coeff_P2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 acos_poly_p8 = acos_t2,acos_coeff_P10,acos_poly_p8
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 acos_poly_p11 = acos_t,acos_coeff_P12,acos_coeff_P11
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 acos_e0 = acos_d2,acos_l1,acos_d
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 acos_1poly_p15 = acos_tx,acos_coeff_P16,acos_coeff_P15
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 acos_poly_p0 = acos_t,acos_coeff_P1,f1
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 acos_1poly_p19 = acos_tx,acos_coeff_P20,acos_coeff_P19
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 acos_1poly_p4 = acos_tx,acos_coeff_P5,acos_coeff_P4
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 acos_1poly_p8 = acos_tx,acos_coeff_P9,acos_coeff_P8
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 acos_l2 = acos_231by16,acos_d,acos_63by8
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 acos_d3 = acos_d2,acos_d,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 acos_T3 = acos_d2,acos_T0,f0
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 acos_429by16 = acos_18by4,acos_11by4,acos_231by16
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 acos_S1 = acos_e0,acos_S0,acos_S0
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 acos_poly_p4 = acos_t4,acos_poly_p8,acos_poly_p4
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 acos_poly_p15 = acos_t4,acos_poly_p19,acos_poly_p15
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 acos_poly_p0 = acos_t2,acos_poly_p2,acos_poly_p0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 acos_poly_p11 = acos_t2,acos_poly_p13,acos_poly_p11
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 acos_t8 = acos_t4,acos_t4,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 acos_e1 = acos_d2,acos_l2,acos_d1
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 acos_1poly_p4 = acos_tx2,acos_1poly_p6,acos_1poly_p4
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 acos_1poly_p15 = acos_tx2,acos_1poly_p17,acos_1poly_p15
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 acos_1poly_p8 = acos_tx2,acos_coeff_P10,acos_1poly_p8
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 acos_1poly_p19 = acos_tx2,acos_coeff_P21,acos_1poly_p19
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 acos_1poly_p2 = acos_tx,acos_coeff_P3,acos_coeff_P2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 acos_1poly_p13 = acos_tx,acos_coeff_P14,acos_coeff_P13
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 acos_1poly_p0 = acos_tx,acos_coeff_P1,f1
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 acos_1poly_p11 = acos_tx,acos_coeff_P12,acos_coeff_P11
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 acos_l3 = acos_429by16,acos_d,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 acos_z = acos_e1,acos_T3,acos_S1
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 acos_poly_p11 = acos_t4,acos_poly_p15,acos_poly_p11
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 acos_T6 = acos_T3,acos_d3,f0
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 acos_t11 = acos_t8,acos_t3,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 acos_poly_p0 = acos_t4,acos_poly_p4,acos_poly_p0
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 acos_1poly_p4 = acos_tx4,acos_1poly_p8,acos_1poly_p4
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 acos_1poly_p15 = acos_tx4,acos_1poly_p19,acos_1poly_p15
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 acos_1poly_p0 = acos_tx2,acos_1poly_p2,acos_1poly_p0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 acos_1poly_p11 = acos_tx2,acos_1poly_p13,acos_1poly_p11
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+// fcmp.le.s1 acos_pred_LEsqrt2by2,acos_pred_GTsqrt2by2 = acos_abs_x,acos_const_sqrt2by2
+ fcmp.le.s1 p7,p8 = acos_abs_x,acos_const_sqrt2by2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 acos_tx8 = acos_tx4,acos_tx4,f0
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 acos_z = acos_l3,acos_T6,acos_z
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 acos_series_t = acos_t11,acos_poly_p11,acos_poly_p0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p11) fma.s1 acos_const_add = acos_const_piby2, f1, acos_const_piby2
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p12) fma.s1 acos_const_add = f1,f0,f0
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 acos_1poly_p0 = acos_tx4,acos_1poly_p4,acos_1poly_p0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 acos_1poly_p11 = acos_tx4,acos_1poly_p15,acos_1poly_p11
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 acos_tx11 = acos_tx8,acos_tx3,f0
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+//(acos_pred_GTsqrt2by2) fnma.s1 answer2 = acos_z,acos_series_t,acos_const_piby2
+(p8) fnma.s1 answer2 = acos_z,acos_series_t,f0
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 acos_series_tx = acos_tx11,acos_1poly_p11,acos_1poly_p0
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+//(acos_pred_GTsqrt2by2) fnma.d f8 = acos_sgn_x,answer2,acos_const_piby2
+(p8) fnma.d f8 = acos_sgn_x,answer2,acos_const_add
+ nop.i 999;;
+}
+
+{ .mfb
+ nop.m 999
+//(acos_pred_LEsqrt2by2) fnma.d f8 = f8,acos_series_tx,acos_const_piby2
+(p7) fnma.d f8 = f8,acos_series_tx,acos_const_piby2
+ br.ret.sptk b0 ;;
+}
+
+
+L(ACOS_ZERO):
+// Here if x=0
+{ .mfb
+ nop.m 999
+ fma.d f8 = acos_const_piby2,f1,f0
+ br.ret.sptk b0 ;;
+}
+
+
+L(ACOS_ABS_ONE):
+.pred.rel "mutex",p11,p12
+// Here if |x|=1
+{ .mfi
+ nop.m 999
+(p11) fma.d f8 = acos_const_piby2,f1,acos_const_piby2 // acos(-1)=pi
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p12) fma.d f8 = f1,f0,f0 // acos(1)=0
+ br.ret.sptk b0 ;;
+}
+
+
+.endp acos
+ASM_SIZE_DIRECTIVE(acos)
+
+.proc __libm_error_region
+__libm_error_region:
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 999
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfs [GR_Parameter_Y] = f1,16 // Store Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+
+.body
+ frcpa.s0 f9,p0 = f0,f0
+;;
+
+{ .mib
+ stfd [GR_Parameter_X] = f8 // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
+}
+{ .mib
+ stfd [GR_Parameter_Y] = f9,-16 // Store Parameter 3 on stack
+ adds r32 = 48,sp
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ ldfd f8 = [r32] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+.type __libm_error_support,@function
+.global __libm_error_support
diff --git a/sysdeps/ia64/fpu/e_acosf.S b/sysdeps/ia64/fpu/e_acosf.S
new file mode 100644
index 0000000..5df3afc
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_acosf.S
@@ -0,0 +1,693 @@
+.file "acosf.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+
+// History
+//==============================================================
+// 2/02/00 Initial revision
+// 6/28/00 Improved speed
+// 6/31/00 Changed register allocation because of some duplicate macros
+// moved nan exit bundle up to gain a cycle.
+// 8/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+// 8/17/00 Changed predicate register macro-usage to direct predicate
+// names due to an assembler bug.
+// 10/17/00 Improved speed of x=0 and x=1 paths, set D flag if x denormal.
+
+
+// Description
+//=========================================
+// The acosf function computes the principle value of the arc sine of x.
+// A doman error occurs for arguments not in the range [-1,+1].
+
+// The acosf function returns the arc cosine in the range [0, +pi] radians.
+// acos(1) returns +0
+// acos(x) returns a Nan and raises the invalid exception for |x| >1
+
+// |x| <= sqrt(2)/2. get Ax and Bx
+
+// poly_p1 = x p1
+// poly_p3 = x2 p4 + p3
+// poly_p1 = x2 (poly_p1) + x = x2(x p1) + x
+// poly_p2 = x2( poly_p3) + p2 = x2(x2 p4 + p3) + p2
+
+// poly_Ax = x5(x2( poly_p3) + p2) + x2(x p1) + x
+// = x5(x2(x2 p4 + p3) + p2) + x2(x p1) + x
+
+// poly_p7 = x2 p8 + p7
+// poly_p5 = x2 p6 + p5
+
+// poly_p7 = x4 p9 + (x2 p8 + p7)
+// poly_Bx = x4 (x4 p9 + (x2 p8 + p7)) + x2 p6 + p5
+
+// sinf1 = x11(x4 (x4 p9 + (x2 p8 + p7)) + x2 p6 + p5) + x5(x2(x2 p4 + p3) + p2) + x2(x p1) + x
+// = x19 p9 + x17 p8 + x15 p7 x13 p6 + x11 p5 + x9 p4 + x7 p3 + x5 p2 + x3 p1 + x
+// answer1 = pi/2 - sinf1
+
+
+
+// |x| > sqrt(2)/2
+
+// Get z = sqrt(1-x2)
+
+// Get polynomial in t = 1-x2
+
+// t2 = t t
+// t4 = t2 t2
+
+// poly_p4 = t p5 + p4
+// poly_p1 = t p1 + 1
+
+// poly_p6 = t p7 + p6
+// poly_p2 = t p3 + p2
+
+// poly_p8 = t p9 + p8
+
+// poly_p4 = t2 poly_p6 + poly_p4
+// = t2 (t p7 + p6) + (t p5 + p4)
+
+// poly_p2 = t2 poly_p2 + poly_p1
+// = t2 (t p3 + p2) + (t p1 + 1)
+
+// poly_p4 = t4 poly_p8 + poly_p4
+// = t4 (t p9 + p8) + (t2 (t p7 + p6) + (t p5 + p4))
+
+// P(t) = poly_p2 + t4 poly_p8
+// = t2 (t p3 + p2) + (t p1 + 1) + t4 (t4 (t p9 + p8) + (t2 (t p7 + p6) + (t p5 + p4)))
+// = t3 p3 + t2 p2 + t p1 + 1 + t9 p9 + t8 p8 + t7 p7 + t6 p6 + t5 p5 + t4 p4
+
+
+// answer2 = sign(x) z P(t) if x>0
+// = sign(x) z P(t) + pi if x<0
+
+#include "libm_support.h"
+
+//
+// Assembly macros
+//=========================================
+
+// predicate registers
+//acosf_pred_LEsqrt2by2 = p7
+//acosf_pred_GTsqrt2by2 = p8
+
+// integer registers
+ACOSF_Addr1 = r33
+ACOSF_Addr2 = r34
+ACOSF_GR_1by2 = r35
+
+ACOSF_GR_3by2 = r36
+ACOSF_GR_5by2 = r37
+
+GR_SAVE_B0 = r38
+GR_SAVE_PFS = r39
+GR_SAVE_GP = r40
+
+GR_Parameter_X = r41
+GR_Parameter_Y = r42
+GR_Parameter_RESULT = r43
+GR_Parameter_TAG = r44
+
+// floating point registers
+
+acosf_y = f32
+acosf_abs_x = f33
+acosf_x2 = f34
+acosf_sgn_x = f35
+
+acosf_1by2 = f36
+acosf_3by2 = f37
+acosf_5by2 = f38
+acosf_coeff_P3 = f39
+acosf_coeff_P8 = f40
+
+acosf_coeff_P1 = f41
+acosf_coeff_P4 = f42
+acosf_coeff_P5 = f43
+acosf_coeff_P2 = f44
+acosf_coeff_P7 = f45
+
+acosf_coeff_P6 = f46
+acosf_coeff_P9 = f47
+acosf_x2 = f48
+acosf_x3 = f49
+acosf_x4 = f50
+
+acosf_x8 = f51
+acosf_x5 = f52
+acosf_const_piby2 = f53
+acosf_const_sqrt2by2 = f54
+acosf_x11 = f55
+
+acosf_poly_p1 = f56
+acosf_poly_p3 = f57
+acosf_sinf1 = f58
+acosf_poly_p2 = f59
+acosf_poly_Ax = f60
+
+acosf_poly_p7 = f61
+acosf_poly_p5 = f62
+acosf_sgnx_t4 = f63
+acosf_poly_Bx = f64
+acosf_t = f65
+
+acosf_yby2 = f66
+acosf_B = f67
+acosf_B2 = f68
+acosf_Az = f69
+acosf_dz = f70
+
+acosf_Sz = f71
+acosf_d2z = f72
+acosf_Fz = f73
+acosf_z = f74
+acosf_sgnx_z = f75
+
+acosf_t2 = f76
+acosf_2poly_p4 = f77
+acosf_2poly_p6 = f78
+acosf_2poly_p1 = f79
+acosf_2poly_p2 = f80
+
+acosf_2poly_p8 = f81
+acosf_t4 = f82
+acosf_Pt = f83
+acosf_sgnx_2poly_p2 = f84
+acosf_sgn_x_piby2 = f85
+
+acosf_poly_p7a = f86
+acosf_2poly_p4a = f87
+acosf_2poly_p4b = f88
+acosf_2poly_p2a = f89
+acosf_poly_p1a = f90
+
+
+
+
+
+// Data tables
+//==============================================================
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
+.align 16
+
+acosf_coeff_1_table:
+ASM_TYPE_DIRECTIVE(acosf_coeff_1_table,@object)
+data8 0x3FC5555607DCF816 // P1
+data8 0x3F9CF81AD9BAB2C6 // P4
+data8 0x3FC59E0975074DF3 // P7
+data8 0xBFA6F4CC2780AA1D // P6
+data8 0x3FC2DD45292E93CB // P9
+data8 0x3fe6a09e667f3bcd // sqrt(2)/2
+ASM_SIZE_DIRECTIVE(acosf_coeff_1_table)
+
+acosf_coeff_2_table:
+ASM_TYPE_DIRECTIVE(acosf_coeff_2_table,@object)
+data8 0x3FA6F108E31EFBA6 // P3
+data8 0xBFCA31BF175D82A0 // P8
+data8 0x3FA30C0337F6418B // P5
+data8 0x3FB332C9266CB1F9 // P2
+data8 0x3ff921fb54442d18 // pi_by_2
+ASM_SIZE_DIRECTIVE(acosf_coeff_2_table)
+
+.align 32
+.global acosf
+ASM_TYPE_DIRECTIVE(acosf,@function)
+
+.section .text
+.proc acosf
+.align 32
+
+acosf:
+
+// Load the addresses of the two tables.
+// Then, load the coefficients and other constants.
+
+{ .mfi
+ alloc r32 = ar.pfs,1,8,4,0
+ fnma.s1 acosf_t = f8,f8,f1
+ dep.z ACOSF_GR_1by2 = 0x3f,24,8 // 0x3f000000
+}
+{ .mfi
+ addl ACOSF_Addr1 = @ltoff(acosf_coeff_1_table),gp
+ fma.s1 acosf_x2 = f8,f8,f0
+ addl ACOSF_Addr2 = @ltoff(acosf_coeff_2_table),gp ;;
+}
+
+
+{ .mfi
+ ld8 ACOSF_Addr1 = [ACOSF_Addr1]
+ fmerge.s acosf_abs_x = f1,f8
+ dep ACOSF_GR_3by2 = -1,r0,22,8 // 0x3fc00000
+}
+{ .mlx
+ nop.m 999
+ movl ACOSF_GR_5by2 = 0x40200000;;
+}
+
+
+
+{ .mfi
+ setf.s acosf_1by2 = ACOSF_GR_1by2
+ fmerge.s acosf_sgn_x = f8,f1
+ nop.i 999
+}
+{ .mfi
+ ld8 ACOSF_Addr2 = [ACOSF_Addr2]
+ nop.f 0
+ nop.i 999;;
+}
+
+
+{ .mfi
+ setf.s acosf_5by2 = ACOSF_GR_5by2
+ fcmp.lt.s1 p11,p12 = f8,f0
+ nop.i 999;;
+}
+
+{ .mmf
+ ldfpd acosf_coeff_P1,acosf_coeff_P4 = [ACOSF_Addr1],16
+ setf.s acosf_3by2 = ACOSF_GR_3by2
+ fclass.m.unc p8,p0 = f8, 0xc3 ;; //@qnan | @snan
+}
+
+
+{ .mfi
+ ldfpd acosf_coeff_P7,acosf_coeff_P6 = [ACOSF_Addr1],16
+ fma.s1 acosf_t2 = acosf_t,acosf_t,f0
+ nop.i 999
+}
+{ .mfi
+ ldfpd acosf_coeff_P3,acosf_coeff_P8 = [ACOSF_Addr2],16
+ fma.s1 acosf_x4 = acosf_x2,acosf_x2,f0
+ nop.i 999;;
+}
+
+
+{ .mfi
+ ldfpd acosf_coeff_P9,acosf_const_sqrt2by2 = [ACOSF_Addr1]
+ fclass.m.unc p10,p0 = f8, 0x07 //@zero
+ nop.i 999
+}
+{ .mfi
+ ldfpd acosf_coeff_P5,acosf_coeff_P2 = [ACOSF_Addr2],16
+ fma.s1 acosf_x3 = f8,acosf_x2,f0
+ nop.i 999;;
+}
+
+
+{ .mfi
+ ldfd acosf_const_piby2 = [ACOSF_Addr2]
+ frsqrta.s1 acosf_B,p0 = acosf_t
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p8) fma.s f8 = f8,f1,f0
+(p8) br.ret.spnt b0 ;; // Exit if x=nan
+}
+
+
+{ .mfb
+ nop.m 999
+ fcmp.eq.s1 p6,p0 = acosf_abs_x,f1
+(p10) br.cond.spnt L(ACOSF_ZERO) ;; // Branch if x=0
+}
+
+{ .mfi
+ nop.m 999
+ fcmp.gt.s1 p9,p0 = acosf_abs_x,f1
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 acosf_x8 = acosf_x4,acosf_x4,f0
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+ fma.s1 acosf_t4 = acosf_t2,acosf_t2,f0
+(p6) br.cond.spnt L(ACOSF_ABS_ONE) ;; // Branch if |x|=1
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 acosf_x5 = acosf_x2,acosf_x3,f0
+ nop.i 999
+}
+{ .mfb
+(p9) mov GR_Parameter_TAG = 59
+ fma.s1 acosf_yby2 = acosf_t,acosf_1by2,f0
+(p9) br.cond.spnt __libm_error_region ;; // Branch if |x|>1
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 acosf_Az = acosf_t,acosf_B,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 acosf_B2 = acosf_B,acosf_B,f0
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 acosf_poly_p1 = f8,acosf_coeff_P1,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 acosf_2poly_p1 = acosf_coeff_P1,acosf_t,f1
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 acosf_poly_p3 = acosf_coeff_P4,acosf_x2,acosf_coeff_P3
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 acosf_2poly_p6 = acosf_coeff_P7,acosf_t,acosf_coeff_P6
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 acosf_poly_p7 = acosf_x2,acosf_coeff_P8,acosf_coeff_P7
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 acosf_2poly_p2 = acosf_coeff_P3,acosf_t,acosf_coeff_P2
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 acosf_poly_p5 = acosf_x2,acosf_coeff_P6,acosf_coeff_P5
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 acosf_2poly_p4 = acosf_coeff_P5,acosf_t,acosf_coeff_P4
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 acosf_x11 = acosf_x8,acosf_x3,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fnma.s1 acosf_dz = acosf_B2,acosf_yby2,acosf_1by2
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 acosf_poly_p1a = acosf_x2,acosf_poly_p1,f8
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 acosf_2poly_p8 = acosf_coeff_P9,acosf_t,acosf_coeff_P8
+ nop.i 999;;
+}
+
+
+// Get the absolute value of x and determine the region in which x lies
+
+{ .mfi
+ nop.m 999
+ fcmp.le.s1 p7,p8 = acosf_abs_x,acosf_const_sqrt2by2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 acosf_poly_p2 = acosf_x2,acosf_poly_p3,acosf_coeff_P2
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 acosf_poly_p7a = acosf_x4,acosf_coeff_P9,acosf_poly_p7
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 acosf_2poly_p2a = acosf_2poly_p2,acosf_t2,acosf_2poly_p1
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 acosf_sgnx_t4 = acosf_sgn_x,acosf_t4,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p8) fma.s1 acosf_2poly_p4a = acosf_2poly_p6,acosf_t2,acosf_2poly_p4
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 acosf_Sz = acosf_5by2,acosf_dz,acosf_3by2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p8) fma.s1 acosf_d2z = acosf_dz,acosf_dz,f0
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p8) fnma.d.s1 acosf_sgn_x_piby2 = acosf_sgn_x,acosf_const_piby2,acosf_const_piby2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p7) fma.s1 acosf_poly_Ax = acosf_x5,acosf_poly_p2,acosf_poly_p1a
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+(p7) fma.s1 acosf_poly_Bx = acosf_x4,acosf_poly_p7a,acosf_poly_p5
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p8) fma.s1 acosf_sgnx_2poly_p2 = acosf_sgn_x,acosf_2poly_p2a,f0
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+ fcmp.eq.s0 p6,p0 = f8,f0 // Only purpose is to set D if x denormal
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p8) fma.s1 acosf_2poly_p4b = acosf_2poly_p8,acosf_t4,acosf_2poly_p4a
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 acosf_Fz = acosf_d2z,acosf_Sz,acosf_dz
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p8) fma.d.s1 acosf_Pt = acosf_2poly_p4b,acosf_sgnx_t4,acosf_sgnx_2poly_p2
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+(p8) fma.d.s1 acosf_z = acosf_Az,acosf_Fz,acosf_Az
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p7) fma.d.s1 acosf_sinf1 = acosf_x11,acosf_poly_Bx,acosf_poly_Ax
+ nop.i 999;;
+}
+
+.pred.rel "mutex",p8,p7 //acosf_pred_GTsqrt2by2,acosf_pred_LEsqrt2by2
+{ .mfi
+ nop.m 999
+(p8) fma.s f8 = acosf_z,acosf_Pt,acosf_sgn_x_piby2
+ nop.i 999
+}
+
+{ .mfb
+ nop.m 999
+(p7) fms.s f8 = acosf_const_piby2,f1,acosf_sinf1
+ br.ret.sptk b0 ;;
+}
+
+L(ACOSF_ZERO):
+// Here if x=0
+{ .mfb
+ nop.m 999
+ fma.s f8 = acosf_const_piby2,f1,f0 // acosf(0)=pi/2
+ br.ret.sptk b0 ;;
+}
+
+
+L(ACOSF_ABS_ONE):
+.pred.rel "mutex",p11,p12
+// Here if |x|=1
+{ .mfi
+ nop.m 999
+(p11) fma.s f8 = acosf_const_piby2,f1,acosf_const_piby2 // acosf(-1)=pi
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p12) fma.s f8 = f1,f0,f0 // acosf(1)=0
+ br.ret.sptk b0 ;;
+}
+
+.endp acosf
+ASM_SIZE_DIRECTIVE(acosf)
+
+
+// Stack operations when calling error support.
+// (1) (2)
+// sp -> + psp -> +
+// | |
+// | | <- GR_Y
+// | |
+// | <-GR_Y Y2->|
+// | |
+// | | <- GR_X
+// | |
+// sp-64 -> + sp -> +
+// save ar.pfs save b0
+// save gp
+
+
+// Stack operations when calling error support.
+// (3) (call) (4)
+// psp -> + sp -> +
+// | |
+// R3 ->| <- GR_RESULT | -> f8
+// | |
+// Y2 ->| <- GR_Y |
+// | |
+// X1 ->| |
+// | |
+// sp -> + +
+// restore gp
+// restore ar.pfs
+
+
+.proc __libm_error_region
+__libm_error_region:
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 999
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfs [GR_Parameter_Y] = f1,16 // Store Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+
+.body
+{ .mfi
+ nop.m 0
+ frcpa.s0 f9,p0 = f0,f0
+ nop.i 0
+};;
+
+{ .mib
+ stfs [GR_Parameter_X] = f8 // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
+}
+{ .mib
+ stfs [GR_Parameter_Y] = f9 // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+
+{ .mmi
+ ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_acosl.S b/sysdeps/ia64/fpu/e_acosl.S
new file mode 100644
index 0000000..81f56e4
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_acosl.S
@@ -0,0 +1,1094 @@
+.file "acosl.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00 Initial version
+// 2/07/00 Modified calculation of acos_corr to correct acosl
+// 4/04/00 Unwind support added
+// 8/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+// 12/20/00 Set denormal flag properly.
+//
+// API
+//==============================================================
+// double-extended = acosl (double-extended)
+// input floating point f8
+// output floating point f8
+//
+// Registers used
+//==============================================================
+//
+// predicate registers used:
+// p6 -> p12
+//
+// floating-point registers used:
+// f8 has input, then output
+// f8 -> f15, f32 ->f99
+//
+// general registers used:
+// r32 -> r48
+//
+// Overview of operation
+//==============================================================
+// There are three paths
+// 1. |x| < 2^-25 ACOS_TINY
+// 2. 2^-25 <= |x| < 1/4 ACOS_POLY
+// 3. 1/4 <= |x| < 1 ACOS_ATAN
+
+#include "libm_support.h"
+
+// Assembly macros
+//==============================================================
+
+// f8 is input, but acos_V must be put in f8
+// when __libm_atan2_reg is called, f8 must get V
+// f9 gets U when __libm_atan2_reg is called
+
+
+// __libm_atan2_reg returns
+// f8 = Z_hi
+// f10 = Z_lo
+// f11 = s_lo
+
+acos_Z_hi = f8
+acos_Z_lo = f10
+acos_S_lo = f11
+
+// When we call __libm_atan2_reg, we must save
+// the following:
+
+acos_corr = f12
+acos_X = f13
+acos_pi_hi = f14
+acos_pi_lo = f15
+
+// The rest of the assembly macros
+
+acos_P79 = f32
+acos_P59 = f33
+acos_P39 = f34
+acos_P19 = f35
+
+acos_P810 = f36
+acos_P610 = f37
+acos_P410 = f38
+acos_P210 = f39
+
+acos_A1 = f41
+acos_A2 = f42
+acos_A3 = f43
+acos_A4 = f44
+acos_A5 = f45
+acos_A6 = f46
+acos_A7 = f47
+acos_A8 = f48
+acos_A9 = f49
+acos_A10 = f50
+
+acos_X2 = f51
+acos_X4 = f52
+
+acos_B = f53
+acos_Bb = f54
+acos_A = f55
+acos_Aa = f56
+
+acos_1mA = f57
+
+acos_W = f58
+acos_Ww = f59
+
+acos_y0 = f60
+acos_y1 = f61
+acos_y2 = f62
+
+acos_H = f63
+acos_Hh = f64
+
+acos_t1 = f65
+acos_t2 = f66
+acos_t3 = f67
+acos_t4 = f68
+acos_t5 = f69
+
+acos_Pseries = f70
+acos_NORM_f8 = f71
+acos_ABS_NORM_f8 = f72
+
+acos_2 = f73
+acos_P1P2 = f74
+acos_HALF = f75
+acos_U = f76
+
+acos_1mB = f77
+acos_V = f78
+acos_S = f79
+
+acos_BmUU = f80
+acos_BmUUpb = f81
+acos_2U = f82
+acos_1d2U = f83
+
+acos_Dd = f84
+
+acos_pi_by_2_hi = f85
+acos_pi_by_2_lo = f86
+acos_xmpi_by_2_lo = f87
+acos_xPmw = f88
+
+acos_Uu = f89
+acos_AmVV = f90
+acos_AmVVpa = f91
+
+acos_2V = f92
+acos_1d2V = f93
+acos_Vv = f94
+
+acos_Vu = f95
+acos_Uv = f96
+
+acos_2_Z_hi = f97
+acos_s_lo_Z_lo = f98
+acos_result_lo = f99
+
+acos_Z_hi = f8
+acos_Z_lo = f10
+acos_s_lo = f11
+
+acos_GR_17_ones = r33
+acos_GR_16_ones = r34
+acos_GR_signexp_f8 = r35
+acos_GR_exp = r36
+acos_GR_true_exp = r37
+acos_GR_fffe = r38
+
+GR_SAVE_PFS = r43
+GR_SAVE_B0 = r39
+GR_SAVE_GP = r41
+
+// r40 is address of table of coefficients
+// r42
+
+GR_Parameter_X = r44
+GR_Parameter_Y = r45
+GR_Parameter_RESULT = r46
+GR_Parameter_TAG = r47
+
+
+// 2^-40:
+// A true exponent of -40 is
+// : -40 + register_bias
+// : -28 + ffff = ffd7
+
+// A true exponent of 1 is
+// : 1 + register_bias
+// : 1 + ffff = 10000
+
+// Data tables
+//==============================================================
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
+.align 16
+
+acos_coefficients:
+ASM_TYPE_DIRECTIVE(acos_coefficients,@object)
+data8 0xc90fdaa22168c234, 0x00003FFF // pi_by_2_hi
+data8 0xc4c6628b80dc1cd1, 0x00003FBF // pi_by_2_lo
+data8 0xc90fdaa22168c234, 0x00004000 // pi_hi
+data8 0xc4c6628b80dc1cd1, 0x00003FC0 // pi_lo
+
+data8 0xBB08911F2013961E, 0x00003FF8 // A10
+data8 0x981F1095A23A87D3, 0x00003FF8 // A9
+data8 0xBDF09C6C4177BCC6, 0x00003FF8 // A8
+data8 0xE4C3A60B049ACCEA, 0x00003FF8 // A7
+data8 0x8E2789F4E8A8F1AD, 0x00003FF9 // A6
+data8 0xB745D09B2B0E850B, 0x00003FF9 // A5
+data8 0xF8E38E3BC4C50920, 0x00003FF9 // A4
+data8 0xB6DB6DB6D89FCD81, 0x00003FFA // A3
+data8 0x99999999999AF376, 0x00003FFB // A2
+data8 0xAAAAAAAAAAAAAA71, 0x00003FFC // A1
+ASM_SIZE_DIRECTIVE(acos_coefficients)
+
+
+.align 32
+.global acosl#
+ASM_TYPE_DIRECTIVE(acosl#,@function)
+
+.section .text
+.proc acosl#
+.align 32
+
+
+acosl:
+
+// After normalizing f8, get its true exponent
+{ .mfi
+ alloc r32 = ar.pfs,1,11,4,0
+(p0) fnorm.s1 acos_NORM_f8 = f8
+(p0) mov acos_GR_17_ones = 0x1ffff
+}
+
+{ .mmi
+(p0) mov acos_GR_16_ones = 0xffff
+(p0) addl r40 = @ltoff(acos_coefficients), gp
+ nop.i 999
+}
+;;
+
+// Set denormal flag on denormal input with fcmp
+{ .mfi
+ ld8 r40 = [r40]
+ fcmp.eq p6,p0 = f8,f0
+ nop.i 999
+}
+;;
+
+
+// Load the constants pi_by_2 and pi.
+// Each is stored as hi and lo values
+// Also load the coefficients for ACOS_POLY
+
+{ .mmi
+(p0) ldfe acos_pi_by_2_hi = [r40],16 ;;
+(p0) ldfe acos_pi_by_2_lo = [r40],16
+ nop.i 999 ;;
+}
+
+{ .mmi
+(p0) ldfe acos_pi_hi = [r40],16 ;;
+(p0) ldfe acos_pi_lo = [r40],16
+ nop.i 999 ;;
+}
+
+{ .mmi
+(p0) ldfe acos_A10 = [r40],16 ;;
+(p0) ldfe acos_A9 = [r40],16
+ nop.i 999 ;;
+}
+
+// Take the absolute value of f8
+{ .mmf
+ nop.m 999
+(p0) getf.exp acos_GR_signexp_f8 = acos_NORM_f8
+(p0) fmerge.s acos_ABS_NORM_f8 = f0, acos_NORM_f8
+}
+
+{ .mii
+(p0) ldfe acos_A8 = [r40],16
+ nop.i 999 ;;
+(p0) and acos_GR_exp = acos_GR_signexp_f8, acos_GR_17_ones ;;
+}
+
+// case 1: |x| < 2^-25 ==> p6 ACOS_TINY
+// case 2: 2^-25 <= |x| < 2^-2 ==> p8 ACOS_POLY
+// case 3: 2^-2 <= |x| < 1 ==> p9 ACOS_ATAN
+// case 4: 1 <= |x| ==> p11 ACOS_ERROR_RETURN
+// Admittedly |x| = 1 is not an error but this is where that case is
+// handled.
+
+{ .mii
+(p0) ldfe acos_A7 = [r40],16
+(p0) sub acos_GR_true_exp = acos_GR_exp, acos_GR_16_ones ;;
+(p0) cmp.ge.unc p6, p7 = -26, acos_GR_true_exp ;;
+}
+
+{ .mii
+(p0) ldfe acos_A6 = [r40],16
+(p7) cmp.ge.unc p8, p9 = -3, acos_GR_true_exp ;;
+(p9) cmp.ge.unc p10, p11 = -1, acos_GR_true_exp
+}
+
+{ .mmi
+(p0) ldfe acos_A5 = [r40],16 ;;
+(p0) ldfe acos_A4 = [r40],16
+ nop.i 999 ;;
+}
+
+{ .mmi
+(p0) ldfe acos_A3 = [r40],16 ;;
+(p0) ldfe acos_A2 = [r40],16
+ nop.i 999 ;;
+}
+
+// ACOS_ERROR_RETURN ==> p11 is true
+// case 4: |x| >= 1
+{ .mib
+(p0) ldfe acos_A1 = [r40],16
+ nop.i 999
+(p11) br.spnt L(ACOS_ERROR_RETURN) ;;
+}
+
+// ACOS_TINY ==> p6 is true
+// case 1: |x| < 2^-25
+{ .mfi
+ nop.m 999
+(p6) fms.s1 acos_xmpi_by_2_lo = acos_NORM_f8,f1, acos_pi_by_2_lo
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p6) fms.s0 f8 = acos_pi_by_2_hi,f1, acos_xmpi_by_2_lo
+(p6) br.ret.spnt b0 ;;
+}
+
+
+
+// ACOS_POLY ==> p8 is true
+// case 2: 2^-25 <= |x| < 2^-2
+{ .mfi
+ nop.m 999
+(p8) fms.s1 acos_W = acos_pi_by_2_hi, f1, acos_NORM_f8
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 acos_X2 = f8,f8, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p8) fms.s1 acos_Ww = acos_pi_by_2_hi, f1, acos_W
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 acos_X4 = acos_X2,acos_X2, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p8) fms.s1 acos_Ww = acos_Ww, f1, acos_NORM_f8
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 acos_P810 = acos_X4, acos_A10, acos_A8
+ nop.i 999
+}
+
+// acos_P79 = X4*A9 + A7
+// acos_P810 = X4*A10 + A8
+{ .mfi
+ nop.m 999
+(p8) fma.s1 acos_P79 = acos_X4, acos_A9, acos_A7
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 acos_Ww = acos_Ww, f1, acos_pi_by_2_lo
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 acos_P610 = acos_X4, acos_P810, acos_A6
+ nop.i 999
+}
+
+
+// acos_P59 = X4*(X4*A9 + A7) + A5
+// acos_P610 = X4*(X4*A10 + A8) + A6
+{ .mfi
+ nop.m 999
+(p8) fma.s1 acos_P59 = acos_X4, acos_P79, acos_A5
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 acos_P410 = acos_X4, acos_P610, acos_A4
+ nop.i 999
+}
+
+// acos_P39 = X4*(X4*(X4*A9 + A7) + A5) + A3
+// acos_P410 = X4*(X4*(X4*A10 + A8) + A6) + A4
+{ .mfi
+ nop.m 999
+(p8) fma.s1 acos_P39 = acos_X4, acos_P59, acos_A3
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 acos_P210 = acos_X4, acos_P410, acos_A2
+ nop.i 999
+}
+
+// acos_P19 = X4*(X4*(X4*(X4*A9 + A7) + A5) + A3) + A1 = P1
+// acos_P210 = X4*(X4*(X4*(X4*A10 + A8) + A6) + A4) + A2 = P2
+{ .mfi
+ nop.m 999
+(p8) fma.s1 acos_P19 = acos_X4, acos_P39, acos_A1
+ nop.i 999 ;;
+}
+
+// acos_P1P2 = Xsq*P2 + P1
+// acos_P1P2 = Xsq*(Xsq*P2 + P1)
+{ .mfi
+ nop.m 999
+(p8) fma.s1 acos_P1P2 = acos_X2, acos_P210, acos_P19
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 acos_P1P2 = acos_X2, acos_P1P2, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p8) fms.s1 acos_xPmw = acos_NORM_f8, acos_P1P2, acos_Ww
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p8) fms.s0 f8 = acos_W, f1, acos_xPmw
+(p8) br.ret.spnt b0 ;;
+}
+
+
+// ACOS_ATAN
+// case 3: 2^-2 <= |x| < 1
+// case 3: 2^-2 <= |x| < 1 ==> p9 ACOS_ATAN
+
+// Step 1.1: Get A,B and a,b
+// A + a = 1- |X|
+// B + b = 1+ |X|
+// Note also that we will use acos_corr (f13)
+// and acos_W
+
+// Step 2
+// Call __libm_atan2_reg
+
+
+{ .mfi
+(p0) mov acos_GR_fffe = 0xfffe
+(p0) fma.s1 acos_B = f1,f1, acos_ABS_NORM_f8
+(p0) mov GR_SAVE_B0 = b0 ;;
+}
+
+{ .mmf
+(p0) mov GR_SAVE_GP = gp
+ nop.m 999
+(p0) fms.s1 acos_A = f1,f1, acos_ABS_NORM_f8
+}
+
+{ .mfi
+(p0) setf.exp acos_HALF = acos_GR_fffe
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fms.s1 acos_1mB = f1,f1, acos_B
+ nop.i 999 ;;
+}
+
+// We want atan2(V,U)
+// so put V in f8 and U in f9
+// but save X in acos_X
+
+{ .mfi
+ nop.m 999
+(p0) fmerge.se acos_X = f8, f8
+ nop.i 999 ;;
+}
+
+// Step 1.2:
+/////////////////////////
+// Get U = sqrt(B)
+/////////////////////////
+
+{ .mfi
+ nop.m 999
+(p0) frsqrta.s1 acos_y0,p8 = acos_B
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fms.s1 acos_1mA = f1,f1, acos_A
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 acos_Bb = acos_1mB,f1, acos_ABS_NORM_f8
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 acos_Hh = acos_HALF, acos_B, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 acos_t1 = acos_y0, acos_y0, f0
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fms.s1 acos_Aa = acos_1mA,f1, acos_ABS_NORM_f8
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fnma.s1 acos_t2 = acos_t1, acos_Hh, acos_HALF
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 acos_y1 = acos_t2, acos_y0, acos_y0
+ nop.i 999
+}
+
+
+// Step 1.2:
+/////////////////////////
+// Get V = sqrt(A)
+/////////////////////////
+{ .mfi
+ nop.m 999
+(p0) frsqrta.s1 acos_y0,p8 = acos_A
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 acos_t3 = acos_y1, acos_Hh, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 acos_t1 = acos_y0, acos_y0, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fnma.s1 acos_t4 = acos_t3, acos_y1, acos_HALF
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 acos_y2 = acos_t4, acos_y1, acos_y1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 acos_S = acos_B, acos_y2, f0
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 acos_H = acos_y2, acos_HALF, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 acos_t5 = acos_Hh, acos_y2, f0
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 acos_Hh = acos_HALF, acos_A, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fnma.s1 acos_Dd = acos_S, acos_S, acos_B
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fnma.s1 acos_t2 = acos_t1, acos_Hh, acos_HALF
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 acos_U = acos_Dd, acos_H, acos_S
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 acos_y1 = acos_t2, acos_y0, acos_y0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 acos_2U = acos_U, f1, acos_U
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 acos_t3 = acos_y1, acos_Hh, f0
+ nop.i 999
+}
+
+
+// Step 1.3:
+// sqrt(A + a) = V + v
+// sqrt(B + b) = U + u
+
+/////////////////////////
+// Get u
+/////////////////////////
+
+// acos_BmUU = B - UU
+// acos_BmUUpb = (B - UU) + b
+
+{ .mfi
+ nop.m 999
+(p0) fnma.s1 acos_BmUU = acos_U, acos_U, acos_B
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fmerge.se f9 = acos_U, acos_U
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fnma.s1 acos_t4 = acos_t3, acos_y1, acos_HALF
+ nop.i 999 ;;
+}
+
+// acos_1d2U = frcpa(2U)
+{ .mfi
+ nop.m 999
+(p0) frcpa.s1 acos_1d2U,p9 = f1, acos_2U
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 acos_BmUUpb = acos_BmUU, f1, acos_Bb
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 acos_y2 = acos_t4, acos_y1, acos_y1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+// acos_Uu = ((B - UU) + b) * frcpa(2U)
+(p0) fma.s1 acos_Uu = acos_BmUUpb, acos_1d2U, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 acos_S = acos_A, acos_y2, f0
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 acos_H = acos_y2, acos_HALF, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 acos_t5 = acos_Hh, acos_y2, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fnma.s1 acos_Dd = acos_S, acos_S, acos_A
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 acos_V = acos_Dd, acos_H, acos_S
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 acos_2V = acos_V, f1, acos_V
+ nop.i 999
+}
+
+// Step 3
+/////////////////////////
+// Calculate the correction, acos_corr
+/////////////////////////
+// acos_corr = U*v - (V*u)
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 acos_Vu = acos_V,acos_Uu, f0
+ nop.i 999 ;;
+}
+
+/////////////////////////
+// Get v
+/////////////////////////
+// acos_AmVV = A - VV
+// acos_AmVVpa = (A - VV) + a
+
+{ .mfi
+ nop.m 999
+(p0) fnma.s1 acos_AmVV = acos_V, acos_V, acos_A
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fmerge.se f8 = acos_V, acos_V
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 acos_AmVVpa = acos_AmVV, f1, acos_Aa
+ nop.i 999 ;;
+}
+
+// acos_1d2V = frcpa(2V)
+{ .mfi
+ nop.m 999
+(p0) frcpa.s1 acos_1d2V,p9 = f1, acos_2V
+ nop.i 999 ;;
+}
+
+// acos_Vv = ((A - VV) + a) * frcpa(2V)
+{ .mfi
+ nop.m 999
+(p0) fma.s1 acos_Vv = acos_AmVVpa, acos_1d2V, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 acos_Uv = acos_U,acos_Vv, f0
+ nop.i 999 ;;
+}
+
+
+.endp acosl#
+ASM_SIZE_DIRECTIVE(acosl#)
+
+
+.proc __libm_callout
+__libm_callout:
+.prologue
+{ .mfi
+ nop.m 0
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs
+}
+;;
+
+{ .mfi
+ mov GR_SAVE_GP=gp
+ nop.f 0
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0
+}
+
+.body
+{ .mfb
+ nop.m 999
+(p0) fms.s1 acos_corr = acos_Uv,f1, acos_Vu
+(p0) br.call.sptk.many b0=__libm_atan2_reg# ;;
+}
+
+
+// p6 ==> X is negative
+// p7 ==> x is positive
+// We know that |X| >= 1/4
+
+{ .mfi
+(p0) mov gp = GR_SAVE_GP
+(p0) fcmp.lt.unc p6,p7 = acos_X , f0
+(p0) mov b0 = GR_SAVE_B0 ;;
+}
+
+// acos_2_Z_hi = 2 * acos_Z_hi
+// acos_s_lo_Z_lo = s_lo * Z_lo
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 acos_2_Z_hi = acos_Z_hi, f1, acos_Z_hi
+(p0) mov ar.pfs = GR_SAVE_PFS
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 acos_s_lo_Z_lo = acos_s_lo, acos_Z_lo, f0
+ nop.i 999 ;;
+}
+
+// 2 is a constant needed later
+{ .mfi
+ nop.m 999
+(p0) fma.s1 acos_2 = f1,f1,f1
+ nop.i 999 ;;
+}
+
+// X >= 1/4
+// acos_result_lo = 2(s_lo * Z_lo) - corr
+// f8 = (2*Z_hi) + (2(s_lo * Z_lo) - corr)
+
+{ .mfi
+ nop.m 999
+(p7) fma.s1 acos_result_lo = acos_s_lo_Z_lo, acos_2, acos_corr
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p7) fma.s0 f8 = acos_2_Z_hi, f1, acos_result_lo
+ nop.i 999
+}
+
+// acos_result_lo = (pi_lo - corr)
+// acos_result_lo = (pi_lo - corr) + acos_Ww
+{ .mfi
+ nop.m 999
+(p6) fms.s1 acos_result_lo = acos_pi_lo, f1, acos_corr
+ nop.i 999 ;;
+}
+
+// X <= -1/4
+// acos_W = pi_hi - 2 * Z_hi
+{ .mfi
+ nop.m 999
+(p6) fnma.s1 acos_W = acos_2, acos_Z_hi, acos_pi_hi
+ nop.i 999 ;;
+}
+
+// acos_Ww = pi_hi - W
+// acos_Ww = (pi_hi - W) + (2 * Z_hi)
+{ .mfi
+ nop.m 999
+(p6) fms.s1 acos_Ww = acos_pi_hi, f1, acos_W
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p6) fms.s1 acos_Ww = acos_Ww, f1, acos_2_Z_hi
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p6) fma.s1 acos_result_lo = acos_result_lo, f1, acos_Ww
+ nop.i 999 ;;
+}
+
+// acos_Z_lo = ((pi_lo - corr) + acos_Ww) - 2 * (s_lo * Z_lo)
+{ .mfi
+ nop.m 999
+(p6) fnma.s1 acos_Z_lo = acos_s_lo_Z_lo, acos_2, acos_result_lo
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p6) fma.s0 f8 = acos_W, f1, acos_Z_lo
+(p0) br.ret.sptk b0 ;;
+}
+.endp __libm_callout
+ASM_SIZE_DIRECTIVE(__libm_callout)
+
+.proc SPECIAL
+SPECIAL:
+L(ACOS_NAN):
+{ .mfb
+ nop.m 999
+(p0) fma.s0 f8 = f8,f1,f0
+(p0) br.ret.sptk b0 ;;
+}
+
+L(ACOS_ERROR_RETURN):
+// Save ar.pfs, b0, and gp; restore on exit
+
+// qnan snan inf norm unorm 0 -+
+// 1 1 0 0 0 0 11 = 0xc3
+
+// Coming in as X = +- 1
+// What should we return?
+
+// If X is 1, return (sign of X)pi/2
+
+
+{ .mfi
+ nop.m 999
+(p0) fcmp.eq.unc p6,p7 = acos_ABS_NORM_f8,f1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p6) fcmp.lt.unc p8,p9 = f8,f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p8) fma.s0 f8 = acos_pi_hi, f1, acos_pi_lo
+ nop.i 999
+}
+
+{ .mfb
+ nop.m 999
+(p9) fmerge.s f8 = f8,f0
+(p6) br.ret.spnt b0 ;;
+}
+
+// If X is a NAN, leave
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p12,p0 = f8, 0xc3
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p12) fma.s0 f8 = f8,f1,f0
+(p12) br.ret.spnt b0 ;;
+}
+
+{ .mfi
+(p0) mov GR_Parameter_TAG = 57
+(p0) frcpa f10, p6 = f0, f0
+nop.i 999
+};;
+
+.endp SPECIAL
+ASM_SIZE_DIRECTIVE(SPECIAL)
+
+.proc __libm_error_region
+__libm_error_region:
+.prologue
+// (1)
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+
+
+// (2)
+{ .mmi
+ stfe [GR_Parameter_Y] = f1,16 // Store Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+
+.body
+// (3)
+{ .mib
+ stfe [GR_Parameter_X] = f8 // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
+}
+{ .mib
+ stfe [GR_Parameter_Y] = f10 // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+
+// (4)
+{ .mmi
+ ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
+
+.type __libm_atan2_reg#,@function
+.global __libm_atan2_reg#
diff --git a/sysdeps/ia64/fpu/e_asin.S b/sysdeps/ia64/fpu/e_asin.S
new file mode 100644
index 0000000..cd19fce
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_asin.S
@@ -0,0 +1,884 @@
+.file "asin.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+
+// History
+//==============================================================
+// 2/02/00 Initial version
+// 8/17/00 New and much faster algorithm.
+// 8/31/00 Avoided bank conflicts on loads, shortened |x|=1 path,
+// fixed mfb split issue stalls.
+// 12/19/00 Fixed small arg cases to force inexact, or inexact and underflow.
+
+// Description
+//=========================================
+// The asin function computes the principle value of the arc sine of x.
+// asin(0) returns 0, asin(1) returns pi/2, asin(-1) returns -pi/2.
+// A doman error occurs for arguments not in the range [-1,+1].
+
+// The asin function returns the arc sine in the range [-pi/2, +pi/2] radians.
+
+#include "libm_support.h"
+
+//
+// Assembly macros
+//=========================================
+
+
+// predicate registers
+//asin_pred_LEsqrt2by2 = p7
+//asin_pred_GTsqrt2by2 = p8
+
+// integer registers
+ASIN_Addr1 = r33
+ASIN_Addr2 = r34
+ASIN_FFFE = r35
+ASIN_lnorm_sig = r36
+ASIN_snorm_exp = r37
+
+GR_SAVE_B0 = r36
+GR_SAVE_PFS = r37
+GR_SAVE_GP = r38
+
+GR_Parameter_X = r39
+GR_Parameter_Y = r40
+GR_Parameter_RESULT = r41
+GR_Parameter_Tag = r42
+
+// floating point registers
+asin_coeff_P1 = f32
+asin_coeff_P2 = f33
+asin_coeff_P3 = f34
+asin_coeff_P4 = f35
+
+asin_coeff_P5 = f36
+asin_coeff_P6 = f37
+asin_coeff_P7 = f38
+asin_coeff_P8 = f39
+asin_coeff_P9 = f40
+
+asin_coeff_P10 = f41
+asin_coeff_P11 = f42
+asin_coeff_P12 = f43
+asin_coeff_P13 = f44
+asin_coeff_P14 = f45
+
+asin_coeff_P15 = f46
+asin_coeff_P16 = f47
+asin_coeff_P17 = f48
+asin_coeff_P18 = f49
+asin_coeff_P19 = f50
+
+asin_coeff_P20 = f51
+asin_coeff_P21 = f52
+asin_const_sqrt2by2 = f53
+asin_const_piby2 = f54
+asin_abs_x = f55
+
+asin_tx = f56
+asin_tx2 = f57
+asin_tx3 = f58
+asin_tx4 = f59
+asin_tx8 = f60
+
+asin_tx11 = f61
+asin_1poly_p8 = f62
+asin_1poly_p19 = f63
+asin_1poly_p4 = f64
+asin_1poly_p15 = f65
+
+asin_1poly_p6 = f66
+asin_1poly_p17 = f67
+asin_1poly_p0 = f68
+asin_1poly_p11 = f69
+asin_1poly_p2 = f70
+
+asin_1poly_p13 = f71
+asin_series_tx = f72
+asin_t = f73
+asin_t2 = f74
+asin_t3 = f75
+
+asin_t4 = f76
+asin_t8 = f77
+asin_t11 = f78
+asin_poly_p8 = f79
+asin_poly_p19 = f80
+
+asin_poly_p4 = f81
+asin_poly_p15 = f82
+asin_poly_p6 = f83
+asin_poly_p17 = f84
+asin_poly_p0 = f85
+
+asin_poly_p11 = f86
+asin_poly_p2 = f87
+asin_poly_p13 = f88
+asin_series_t = f89
+asin_1by2 = f90
+
+asin_3by2 = f91
+asin_5by2 = f92
+asin_11by4 = f93
+asin_35by8 = f94
+asin_63by8 = f95
+
+asin_231by16 = f96
+asin_y0 = f97
+asin_H0 = f98
+asin_S0 = f99
+asin_d = f100
+
+asin_l1 = f101
+asin_d2 = f102
+asin_T0 = f103
+asin_d1 = f104
+asin_e0 = f105
+
+asin_l2 = f106
+asin_d3 = f107
+asin_T3 = f108
+asin_S1 = f109
+asin_e1 = f110
+
+asin_z = f111
+answer2 = f112
+asin_sgn_x = f113
+asin_429by16 = f114
+asin_18by4 = f115
+
+asin_3by4 = f116
+asin_l3 = f117
+asin_T6 = f118
+asin_eps_exp = f119
+asin_eps_sig = f120
+asin_eps = f120
+
+// Data tables
+//==============================================================
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
+.align 16
+
+asin_coeff_1_table:
+ASM_TYPE_DIRECTIVE(asin_coeff_1_table,@object)
+data8 0xE4E7E0A423A21249 , 0x00003FF8 //P7
+data8 0xC2F7EE0200FCE2A5 , 0x0000C003 //P18
+data8 0xB745D7F6C65C20E0 , 0x00003FF9 //P5
+data8 0xF75E381A323D4D94 , 0x0000C002 //P16
+data8 0x8959C2629C1024C0 , 0x0000C002 //P20
+data8 0xAFF68E7D241292C5 , 0x00003FF8 //P9
+data8 0xB6DB6DB7260AC30D , 0x00003FFA //P3
+data8 0xD0417CE2B41CB7BF , 0x0000C000 //P14
+data8 0x81D570FEA724E3E4 , 0x0000BFFD //P12
+data8 0xAAAAAAAAAAAAC277 , 0x00003FFC //P1
+data8 0xF534912FF3E7B76F , 0x00003FFF //P21
+data8 0xc90fdaa22168c235 , 0x00003fff // pi/2
+data8 0x0000000000000000 , 0x00000000 // pad to avoid data bank conflict
+ASM_SIZE_DIRECTIVE(asin_coeff_1_table)
+
+
+asin_coeff_2_table:
+ASM_TYPE_DIRECTIVE(asin_coeff_2_table,@object)
+data8 0x8E26AF5F29B39A2A , 0x00003FF9 //P6
+data8 0xB4F118A4B1015470 , 0x00004003 //P17
+data8 0xF8E38E10C25990E0 , 0x00003FF9 //P4
+data8 0x80F50489AEF1CAC6 , 0x00004002 //P15
+data8 0x92728015172CFE1C , 0x00004003 //P19
+data8 0xBBC3D831D4595971 , 0x00003FF8 //P8
+data8 0x999999999952A5C3 , 0x00003FFB //P2
+data8 0x855576BE6F0975EC , 0x00003FFF //P13
+data8 0xF12420E778077D89 , 0x00003FFA //P11
+data8 0xB6590FF4D23DE003 , 0x00003FF3 //P10
+data8 0xb504f333f9de6484 , 0x00003ffe // sqrt(2)/2
+ASM_SIZE_DIRECTIVE(asin_coeff_2_table)
+
+
+
+.align 32
+.global asin
+
+.section .text
+.proc asin
+.align 32
+
+
+asin:
+
+{ .mfi
+ alloc r32 = ar.pfs,1,6,4,0
+ fma.s1 asin_tx = f8,f8,f0
+ addl ASIN_Addr2 = @ltoff(asin_coeff_2_table),gp
+}
+{ .mfi
+ mov ASIN_FFFE = 0xFFFE
+ fnma.s1 asin_t = f8,f8,f1
+ addl ASIN_Addr1 = @ltoff(asin_coeff_1_table),gp
+}
+;;
+
+
+{ .mfi
+ setf.exp asin_1by2 = ASIN_FFFE
+ fmerge.s asin_abs_x = f1,f8
+ nop.i 999 ;;
+}
+
+{ .mmf
+ ld8 ASIN_Addr1 = [ASIN_Addr1]
+ ld8 ASIN_Addr2 = [ASIN_Addr2]
+ fmerge.s asin_sgn_x = f8,f1 ;;
+}
+
+
+{ .mfi
+ ldfe asin_coeff_P7 = [ASIN_Addr1],16
+ fma.s1 asin_tx2 = asin_tx,asin_tx,f0
+ nop.i 999
+}
+{ .mfi
+ ldfe asin_coeff_P6 = [ASIN_Addr2],16
+ fma.s1 asin_t2 = asin_t,asin_t,f0
+ nop.i 999;;
+}
+
+
+{ .mmf
+ ldfe asin_coeff_P18 = [ASIN_Addr1],16
+ ldfe asin_coeff_P17 = [ASIN_Addr2],16
+ fclass.m.unc p8,p0 = f8, 0xc3 //@qnan |@snan
+}
+;;
+
+{ .mmf
+ ldfe asin_coeff_P5 = [ASIN_Addr1],16
+ ldfe asin_coeff_P4 = [ASIN_Addr2],16
+ frsqrta.s1 asin_y0,p0 = asin_t
+}
+;;
+
+{ .mfi
+ ldfe asin_coeff_P16 = [ASIN_Addr1],16
+ fcmp.gt.s1 p9,p0 = asin_abs_x,f1
+ nop.i 999
+}
+{ .mfb
+ ldfe asin_coeff_P15 = [ASIN_Addr2],16
+(p8) fma.d f8 = f8,f1,f0
+(p8) br.ret.spnt b0
+}
+;;
+
+
+{ .mmf
+ ldfe asin_coeff_P20 = [ASIN_Addr1],16
+ ldfe asin_coeff_P19 = [ASIN_Addr2],16
+ fclass.m.unc p8,p0 = f8, 0x07 //@zero
+}
+;;
+
+
+{ .mfi
+ ldfe asin_coeff_P9 = [ASIN_Addr1],16
+ fma.s1 asin_t4 = asin_t2,asin_t2,f0
+(p9) mov GR_Parameter_Tag = 61
+}
+{ .mfi
+ ldfe asin_coeff_P8 = [ASIN_Addr2],16
+ fma.s1 asin_3by2 = asin_1by2,f1,f1
+ nop.i 999;;
+}
+
+
+{ .mfi
+ ldfe asin_coeff_P2 = [ASIN_Addr2],16
+ fma.s1 asin_tx4 = asin_tx2,asin_tx2,f0
+ nop.i 999
+}
+{ .mfb
+ ldfe asin_coeff_P3 = [ASIN_Addr1],16
+ fma.s1 asin_t3 = asin_t,asin_t2,f0
+(p8) br.ret.spnt b0
+}
+;;
+
+
+{ .mfi
+ ldfe asin_coeff_P13 = [ASIN_Addr2],16
+ fma.s1 asin_H0 = asin_y0,asin_1by2,f0
+ nop.i 999
+}
+{ .mfb
+ ldfe asin_coeff_P14 = [ASIN_Addr1],16
+ fma.s1 asin_S0 = asin_y0,asin_t,f0
+(p9) br.cond.spnt __libm_error_region
+}
+;;
+
+
+{ .mfi
+ ldfe asin_coeff_P11 = [ASIN_Addr2],16
+ fcmp.eq.s1 p6,p0 = asin_abs_x,f1
+ nop.i 999
+}
+{ .mfi
+ ldfe asin_coeff_P12 = [ASIN_Addr1],16
+ fma.s1 asin_tx3 = asin_tx,asin_tx2,f0
+ nop.i 999;;
+}
+
+
+{ .mfi
+ ldfe asin_coeff_P10 = [ASIN_Addr2],16
+ fma.s1 asin_1poly_p6 = asin_tx,asin_coeff_P7,asin_coeff_P6
+ nop.i 999
+}
+{ .mfi
+ ldfe asin_coeff_P1 = [ASIN_Addr1],16
+ fma.s1 asin_poly_p6 = asin_t,asin_coeff_P7,asin_coeff_P6
+ nop.i 999;;
+}
+
+
+{ .mfi
+ ldfe asin_const_sqrt2by2 = [ASIN_Addr2],16
+ fma.s1 asin_5by2 = asin_3by2,f1,f1
+ nop.i 999
+}
+{ .mfi
+ ldfe asin_coeff_P21 = [ASIN_Addr1],16
+ fma.s1 asin_11by4 = asin_3by2,asin_3by2,asin_1by2
+ nop.i 999;;
+}
+
+
+{ .mfi
+ ldfe asin_const_piby2 = [ASIN_Addr1],16
+ fma.s1 asin_poly_p17 = asin_t,asin_coeff_P18,asin_coeff_P17
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+ fma.s1 asin_3by4 = asin_3by2,asin_1by2,f0
+(p6) br.cond.spnt L(ASIN_ABS_1) // Branch to short exit if |x|=1
+}
+;;
+
+
+{ .mfi
+ addl ASIN_lnorm_sig = -0x1,r0 // Form significand 0xffffffffffffffff
+ fma.s1 asin_poly_p15 = asin_t,asin_coeff_P16,asin_coeff_P15
+ nop.i 999
+}
+{ .mfi
+ addl ASIN_snorm_exp = 0x0c001,r0 // Form small exponent
+ fnma.s1 asin_d = asin_S0,asin_H0,asin_1by2
+ nop.i 999;;
+}
+
+
+// Form the exponent and significand of a small number
+{ .mfi
+ setf.sig asin_eps_sig = ASIN_lnorm_sig
+ fma.s1 asin_poly_p19 = asin_t,asin_coeff_P20,asin_coeff_P19
+ nop.i 999
+}
+{ .mfi
+ setf.exp asin_eps_exp = ASIN_snorm_exp
+ fma.s1 asin_poly_p4 = asin_t,asin_coeff_P5,asin_coeff_P4
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 asin_1poly_p17 = asin_tx,asin_coeff_P18,asin_coeff_P17
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 asin_poly_p8 = asin_t,asin_coeff_P9,asin_coeff_P8
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fms.s1 asin_35by8 = asin_5by2,asin_11by4,asin_5by2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 asin_63by8 = asin_5by2,asin_11by4,f1
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 asin_poly_p13 = asin_t,asin_coeff_P14,asin_coeff_P13
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 asin_18by4 = asin_3by2,asin_5by2,asin_3by4
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 asin_l1 = asin_5by2,asin_d,asin_3by2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 asin_d2 = asin_d,asin_d,f0
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 asin_poly_p15 = asin_t2,asin_poly_p17,asin_poly_p15
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 asin_T0 = asin_d,asin_S0,f0
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 asin_poly_p19 = asin_t2,asin_coeff_P21,asin_poly_p19
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 asin_poly_p4 = asin_t2,asin_poly_p6,asin_poly_p4
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 asin_d1 = asin_35by8,asin_d,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 asin_231by16 = asin_3by2,asin_35by8,asin_63by8
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 asin_poly_p2 = asin_t,asin_coeff_P3,asin_coeff_P2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 asin_poly_p8 = asin_t2,asin_coeff_P10,asin_poly_p8
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 asin_poly_p11 = asin_t,asin_coeff_P12,asin_coeff_P11
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 asin_e0 = asin_d2,asin_l1,asin_d
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 asin_1poly_p15 = asin_tx,asin_coeff_P16,asin_coeff_P15
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 asin_poly_p0 = asin_t,asin_coeff_P1,f1
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 asin_1poly_p19 = asin_tx,asin_coeff_P20,asin_coeff_P19
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 asin_1poly_p4 = asin_tx,asin_coeff_P5,asin_coeff_P4
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 asin_1poly_p8 = asin_tx,asin_coeff_P9,asin_coeff_P8
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 asin_l2 = asin_231by16,asin_d,asin_63by8
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 asin_d3 = asin_d2,asin_d,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 asin_T3 = asin_d2,asin_T0,f0
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 asin_429by16 = asin_18by4,asin_11by4,asin_231by16
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 asin_S1 = asin_e0,asin_S0,asin_S0
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 asin_poly_p4 = asin_t4,asin_poly_p8,asin_poly_p4
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 asin_poly_p15 = asin_t4,asin_poly_p19,asin_poly_p15
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 asin_poly_p0 = asin_t2,asin_poly_p2,asin_poly_p0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 asin_poly_p11 = asin_t2,asin_poly_p13,asin_poly_p11
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 asin_t8 = asin_t4,asin_t4,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 asin_e1 = asin_d2,asin_l2,asin_d1
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 asin_1poly_p4 = asin_tx2,asin_1poly_p6,asin_1poly_p4
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 asin_1poly_p15 = asin_tx2,asin_1poly_p17,asin_1poly_p15
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 asin_1poly_p8 = asin_tx2,asin_coeff_P10,asin_1poly_p8
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 asin_1poly_p19 = asin_tx2,asin_coeff_P21,asin_1poly_p19
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 asin_1poly_p2 = asin_tx,asin_coeff_P3,asin_coeff_P2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 asin_1poly_p13 = asin_tx,asin_coeff_P14,asin_coeff_P13
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 asin_1poly_p0 = asin_tx,asin_coeff_P1,f1
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 asin_1poly_p11 = asin_tx,asin_coeff_P12,asin_coeff_P11
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 asin_l3 = asin_429by16,asin_d,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 asin_z = asin_e1,asin_T3,asin_S1
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 asin_poly_p11 = asin_t4,asin_poly_p15,asin_poly_p11
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 asin_T6 = asin_T3,asin_d3,f0
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 asin_t11 = asin_t8,asin_t3,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 asin_poly_p0 = asin_t4,asin_poly_p4,asin_poly_p0
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 asin_1poly_p4 = asin_tx4,asin_1poly_p8,asin_1poly_p4
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 asin_1poly_p15 = asin_tx4,asin_1poly_p19,asin_1poly_p15
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 asin_1poly_p0 = asin_tx2,asin_1poly_p2,asin_1poly_p0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 asin_1poly_p11 = asin_tx2,asin_1poly_p13,asin_1poly_p11
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+// fcmp.le.s1 asin_pred_LEsqrt2by2,asin_pred_GTsqrt2by2 = asin_abs_x,asin_const_sqrt2by2
+ fcmp.le.s1 p7,p8 = asin_abs_x,asin_const_sqrt2by2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 asin_tx8 = asin_tx4,asin_tx4,f0
+ nop.i 999;;
+}
+
+
+// Form a small number to force inexact flag for small args
+{ .mfi
+ nop.m 999
+ fmerge.se asin_eps = asin_eps_exp,asin_eps_sig
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 asin_z = asin_l3,asin_T6,asin_z
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 asin_series_t = asin_t11,asin_poly_p11,asin_poly_p0
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 asin_1poly_p0 = asin_tx4,asin_1poly_p4,asin_1poly_p0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 asin_1poly_p11 = asin_tx4,asin_1poly_p15,asin_1poly_p11
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 asin_tx11 = asin_tx8,asin_tx3,f0
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+//(asin_pred_GTsqrt2by2) fnma.s1 answer2 = asin_z,asin_series_t,asin_const_piby2
+(p8) fnma.s1 answer2 = asin_z,asin_series_t,asin_const_piby2
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 asin_series_tx = asin_tx11,asin_1poly_p11,asin_1poly_p0
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+//(asin_pred_GTsqrt2by2) fma.d f8 = asin_sgn_x,answer2,f0
+(p8) fma.d f8 = asin_sgn_x,answer2,f0
+ nop.i 999;;
+}
+
+// asin_eps is added only to force inexact and possibly underflow flag
+// in case asin_series_tx is zero
+//
+{ .mfi
+ nop.m 999
+(p7) fma.d asin_eps = f8,asin_series_tx,asin_eps
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+//(asin_pred_LEsqrt2by2) fma.d f8 = f8,asin_series_tx,f0
+(p7) fma.d f8 = f8,asin_series_tx,f0
+ br.ret.sptk b0
+}
+;;
+
+
+L(ASIN_ABS_1):
+// Here for short exit if |x|=1
+{ .mfb
+ nop.m 999
+ fma.d f8 = asin_sgn_x,asin_const_piby2,f0
+ br.ret.sptk b0
+}
+;;
+
+
+.endp asin
+ASM_SIZE_DIRECTIVE(asin)
+
+.proc __libm_error_region
+__libm_error_region:
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 999
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfs [GR_Parameter_Y] = f1,16 // Store Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+
+.body
+ frcpa.s0 f9,p0 = f0,f0
+;;
+
+{ .mib
+ stfd [GR_Parameter_X] = f8 // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
+}
+{ .mib
+ stfd [GR_Parameter_Y] = f9,-16 // Store Parameter 3 on stack
+ adds r32 = 48,sp
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ ldfd f8 = [r32] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+.type __libm_error_support,@function
+.global __libm_error_support
diff --git a/sysdeps/ia64/fpu/e_asinf.S b/sysdeps/ia64/fpu/e_asinf.S
new file mode 100644
index 0000000..011dc9e
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_asinf.S
@@ -0,0 +1,674 @@
+.file "asinf.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/02/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+
+// History
+//==============================================================
+// 2/02/00 Initial revision
+// 6/28/00 Improved speed
+// 6/31/00 Changed register allocation because of some duplicate macros
+// moved nan exit bundle up to gain a cycle.
+// 8/08/00 Improved speed by avoiding SIR flush.
+// 8/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+// 8/17/00 Changed predicate register macro-usage to direct predicate
+// names due to an assembler bug.
+// 10/17/00 Improved speed of x=0 and x=1 paths, set D flag if x denormal.
+
+// Description
+//=========================================
+// The asinf function computes the arc sine of x in the range [-pi,+pi].
+// A doman error occurs for arguments not in the range [-1,+1].
+// asinf(+-0) returns +-0
+// asinf(x) returns a Nan and raises the invalid exception for |x| >1
+
+// The acosf function returns the arc cosine in the range [0, +pi] radians.
+// A doman error occurs for arguments not in the range [-1,+1].
+// acosf(1) returns +0
+// acosf(x) returns a Nan and raises the invalid exception for |x| >1
+
+
+// |x| <= sqrt(2)/2. get Ax and Bx
+
+// poly_p1 = x p1
+// poly_p3 = x2 p4 + p3
+// poly_p1 = x2 (poly_p1) + x = x2(x p1) + x
+// poly_p2 = x2( poly_p3) + p2 = x2(x2 p4 + p3) + p2
+
+// poly_Ax = x5(x2( poly_p3) + p2) + x2(x p1) + x
+// = x5(x2(x2 p4 + p3) + p2) + x2(x p1) + x
+
+// poly_p7 = x2 p8 + p7
+// poly_p5 = x2 p6 + p5
+
+// poly_p7 = x4 p9 + (poly_p7)
+// poly_p7 = x4 p9 + (x2 p8 + p7)
+// poly_Bx = x4 (x4 p9 + (x2 p8 + p7)) + x2 p6 + p5
+
+// answer1 = x11(x4 (x4 p9 + (x2 p8 + p7)) + x2 p6 + p5) + x5(x2(x2 p4 + p3) + p2) + x2(x p1) + x
+// = x19 p9 + x17 p8 + x15 p7 x13 p6 + x11 p5 + x9 p4 + x7 p3 + x5 p2 + x3 p1 + x
+
+
+
+// |x| > sqrt(2)/2
+
+// Get z = sqrt(1-x2)
+
+// Get polynomial in t = 1-x2
+
+// t2 = t t
+// t4 = t2 t2
+
+// poly_p4 = t p5 + p4
+// poly_p1 = t p1 + 1
+
+// poly_p6 = t p7 + p6
+// poly_p2 = t p3 + p2
+
+// poly_p8 = t p9 + p8
+
+// poly_p4 = t2 poly_p6 + poly_p4
+// = t2 (t p7 + p6) + (t p5 + p4)
+
+// poly_p2 = t2 poly_p2 + poly_p1
+// = t2 (t p3 + p2) + (t p1 + 1)
+
+// poly_p4 = t4 poly_p8 + poly_p4
+// = t4 (t p9 + p8) + (t2 (t p7 + p6) + (t p5 + p4))
+
+// P(t) = poly_p2 + t4 poly_p8
+// = t2 (t p3 + p2) + (t p1 + 1) + t4 (t4 (t p9 + p8) + (t2 (t p7 + p6) + (t p5 + p4)))
+// = t3 p3 + t2 p2 + t p1 + 1 + t9 p9 + t8 p8 + t7 p7 + t6 p6 + t5 p5 + t4 p4
+
+
+// answer2 = - sign(x) z P(t) + (sign(x) pi/2)
+//
+
+#include "libm_support.h"
+
+// Assembly macros
+//=========================================
+
+// predicate registers
+//asinf_pred_LEsqrt2by2 = p7
+//asinf_pred_GTsqrt2by2 = p8
+
+// integer registers
+ASINF_Addr1 = r33
+ASINF_Addr2 = r34
+ASINF_GR_1by2 = r35
+
+ASINF_GR_3by2 = r36
+ASINF_GR_5by2 = r37
+
+GR_SAVE_B0 = r38
+GR_SAVE_PFS = r39
+GR_SAVE_GP = r40
+
+GR_Parameter_X = r41
+GR_Parameter_Y = r42
+GR_Parameter_RESULT = r43
+GR_Parameter_TAG = r44
+
+// floating point registers
+
+asinf_y = f32
+asinf_abs_x = f33
+asinf_x2 = f34
+asinf_sgn_x = f35
+
+asinf_1by2 = f36
+asinf_3by2 = f37
+asinf_5by2 = f38
+asinf_coeff_P3 = f39
+asinf_coeff_P8 = f40
+
+asinf_coeff_P1 = f41
+asinf_coeff_P4 = f42
+asinf_coeff_P5 = f43
+asinf_coeff_P2 = f44
+asinf_coeff_P7 = f45
+
+asinf_coeff_P6 = f46
+asinf_coeff_P9 = f47
+asinf_x2 = f48
+asinf_x3 = f49
+asinf_x4 = f50
+
+asinf_x8 = f51
+asinf_x5 = f52
+asinf_const_piby2 = f53
+asinf_const_sqrt2by2 = f54
+asinf_x11 = f55
+
+asinf_poly_p1 = f56
+asinf_poly_p3 = f57
+asinf_sinf1 = f58
+asinf_poly_p2 = f59
+asinf_poly_Ax = f60
+
+asinf_poly_p7 = f61
+asinf_poly_p5 = f62
+asinf_sgnx_t4 = f63
+asinf_poly_Bx = f64
+asinf_t = f65
+
+asinf_yby2 = f66
+asinf_B = f67
+asinf_B2 = f68
+asinf_Az = f69
+asinf_dz = f70
+
+asinf_Sz = f71
+asinf_d2z = f72
+asinf_Fz = f73
+asinf_z = f74
+asinf_sgnx_z = f75
+
+asinf_t2 = f76
+asinf_2poly_p4 = f77
+asinf_2poly_p6 = f78
+asinf_2poly_p1 = f79
+asinf_2poly_p2 = f80
+
+asinf_2poly_p8 = f81
+asinf_t4 = f82
+asinf_Pt = f83
+asinf_sgnx_2poly_p2 = f84
+asinf_sgn_x_piby2 = f85
+
+asinf_poly_p7a = f86
+asinf_2poly_p4a = f87
+asinf_2poly_p4b = f88
+asinf_2poly_p2a = f89
+asinf_poly_p1a = f90
+
+
+
+
+
+// Data tables
+//==============================================================
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
+.align 16
+
+asinf_coeff_1_table:
+ASM_TYPE_DIRECTIVE(asinf_coeff_1_table,@object)
+data8 0x3FC5555607DCF816 // P1
+data8 0x3F9CF81AD9BAB2C6 // P4
+data8 0x3FC59E0975074DF3 // P7
+data8 0xBFA6F4CC2780AA1D // P6
+data8 0x3FC2DD45292E93CB // P9
+data8 0x3fe6a09e667f3bcd // sqrt(2)/2
+ASM_SIZE_DIRECTIVE(asinf_coeff_1_table)
+
+asinf_coeff_2_table:
+ASM_TYPE_DIRECTIVE(asinf_coeff_2_table,@object)
+data8 0x3FA6F108E31EFBA6 // P3
+data8 0xBFCA31BF175D82A0 // P8
+data8 0x3FA30C0337F6418B // P5
+data8 0x3FB332C9266CB1F9 // P2
+data8 0x3ff921fb54442d18 // pi_by_2
+ASM_SIZE_DIRECTIVE(asinf_coeff_2_table)
+
+
+.align 32
+.global asinf
+
+.section .text
+.proc asinf
+.align 32
+
+asinf:
+
+// Load the addresses of the two tables.
+// Then, load the coefficients and other constants.
+
+{ .mfi
+ alloc r32 = ar.pfs,1,8,4,0
+ fnma.s1 asinf_t = f8,f8,f1
+ dep.z ASINF_GR_1by2 = 0x3f,24,8 // 0x3f000000
+}
+{ .mfi
+ addl ASINF_Addr1 = @ltoff(asinf_coeff_1_table),gp
+ fma.s1 asinf_x2 = f8,f8,f0
+ addl ASINF_Addr2 = @ltoff(asinf_coeff_2_table),gp ;;
+}
+
+
+{ .mfi
+ ld8 ASINF_Addr1 = [ASINF_Addr1]
+ fmerge.s asinf_abs_x = f1,f8
+ dep ASINF_GR_3by2 = -1,r0,22,8 // 0x3fc00000
+}
+{ .mlx
+ nop.m 999
+ movl ASINF_GR_5by2 = 0x40200000;;
+}
+
+
+
+{ .mfi
+ setf.s asinf_1by2 = ASINF_GR_1by2
+ fmerge.s asinf_sgn_x = f8,f1
+ nop.i 999
+}
+{ .mfi
+ ld8 ASINF_Addr2 = [ASINF_Addr2]
+ nop.f 0
+ nop.i 999;;
+}
+
+
+{ .mfi
+ setf.s asinf_5by2 = ASINF_GR_5by2
+ fcmp.lt.s1 p11,p12 = f8,f0
+ nop.i 999;;
+}
+
+{ .mmf
+ ldfpd asinf_coeff_P1,asinf_coeff_P4 = [ASINF_Addr1],16
+ setf.s asinf_3by2 = ASINF_GR_3by2
+ fclass.m.unc p8,p0 = f8, 0xc3 ;; //@qnan | @snan
+}
+
+
+{ .mfi
+ ldfpd asinf_coeff_P7,asinf_coeff_P6 = [ASINF_Addr1],16
+ fma.s1 asinf_t2 = asinf_t,asinf_t,f0
+ nop.i 999
+}
+{ .mfi
+ ldfpd asinf_coeff_P3,asinf_coeff_P8 = [ASINF_Addr2],16
+ fma.s1 asinf_x4 = asinf_x2,asinf_x2,f0
+ nop.i 999;;
+}
+
+
+{ .mfi
+ ldfpd asinf_coeff_P9,asinf_const_sqrt2by2 = [ASINF_Addr1]
+ fclass.m.unc p10,p0 = f8, 0x07 //@zero
+ nop.i 999
+}
+{ .mfi
+ ldfpd asinf_coeff_P5,asinf_coeff_P2 = [ASINF_Addr2],16
+ fma.s1 asinf_x3 = f8,asinf_x2,f0
+ nop.i 999;;
+}
+
+
+{ .mfi
+ ldfd asinf_const_piby2 = [ASINF_Addr2]
+ frsqrta.s1 asinf_B,p0 = asinf_t
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p8) fma.s f8 = f8,f1,f0
+(p8) br.ret.spnt b0 ;; // Exit if x=nan
+}
+
+
+{ .mfb
+ nop.m 999
+ fcmp.eq.s1 p6,p0 = asinf_abs_x,f1
+(p10) br.ret.spnt b0 ;; // Exit if x=0
+}
+
+{ .mfi
+ nop.m 999
+ fcmp.gt.s1 p9,p0 = asinf_abs_x,f1
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 asinf_x8 = asinf_x4,asinf_x4,f0
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+ fma.s1 asinf_t4 = asinf_t2,asinf_t2,f0
+(p6) br.cond.spnt L(ASINF_ABS_ONE) ;; // Branch if |x|=1
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 asinf_x5 = asinf_x2,asinf_x3,f0
+ nop.i 999
+}
+{ .mfb
+(p9) mov GR_Parameter_TAG = 62
+ fma.s1 asinf_yby2 = asinf_t,asinf_1by2,f0
+(p9) br.cond.spnt __libm_error_region ;; // Branch if |x|>1
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 asinf_Az = asinf_t,asinf_B,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 asinf_B2 = asinf_B,asinf_B,f0
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 asinf_poly_p1 = f8,asinf_coeff_P1,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 asinf_2poly_p1 = asinf_coeff_P1,asinf_t,f1
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 asinf_poly_p3 = asinf_coeff_P4,asinf_x2,asinf_coeff_P3
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 asinf_2poly_p6 = asinf_coeff_P7,asinf_t,asinf_coeff_P6
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 asinf_poly_p7 = asinf_x2,asinf_coeff_P8,asinf_coeff_P7
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 asinf_2poly_p2 = asinf_coeff_P3,asinf_t,asinf_coeff_P2
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 asinf_poly_p5 = asinf_x2,asinf_coeff_P6,asinf_coeff_P5
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 asinf_2poly_p4 = asinf_coeff_P5,asinf_t,asinf_coeff_P4
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.d.s1 asinf_x11 = asinf_x8,asinf_x3,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fnma.s1 asinf_dz = asinf_B2,asinf_yby2,asinf_1by2
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 asinf_poly_p1a = asinf_x2,asinf_poly_p1,f8
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 asinf_2poly_p8 = asinf_coeff_P9,asinf_t,asinf_coeff_P8
+ nop.i 999;;
+}
+
+
+// Get the absolute value of x and determine the region in which x lies
+
+{ .mfi
+ nop.m 999
+ fcmp.le.s1 p7,p8 = asinf_abs_x,asinf_const_sqrt2by2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 asinf_poly_p2 = asinf_x2,asinf_poly_p3,asinf_coeff_P2
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 asinf_poly_p7a = asinf_x4,asinf_coeff_P9,asinf_poly_p7
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 asinf_2poly_p2a = asinf_2poly_p2,asinf_t2,asinf_2poly_p1
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 asinf_sgnx_t4 = asinf_sgn_x,asinf_t4,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p8) fma.s1 asinf_2poly_p4a = asinf_2poly_p6,asinf_t2,asinf_2poly_p4
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 asinf_Sz = asinf_5by2,asinf_dz,asinf_3by2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p8) fma.s1 asinf_d2z = asinf_dz,asinf_dz,f0
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 asinf_sgn_x_piby2 = asinf_sgn_x,asinf_const_piby2,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p7) fma.d.s1 asinf_poly_Ax = asinf_x5,asinf_poly_p2,asinf_poly_p1a
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+(p7) fma.d.s1 asinf_poly_Bx = asinf_x4,asinf_poly_p7a,asinf_poly_p5
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p8) fma.s1 asinf_sgnx_2poly_p2 = asinf_sgn_x,asinf_2poly_p2a,f0
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+ fcmp.eq.s0 p6,p0 = f8,f0 // Only purpose is to set D if x denormal
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p8) fma.s1 asinf_2poly_p4b = asinf_2poly_p8,asinf_t4,asinf_2poly_p4a
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 asinf_Fz = asinf_d2z,asinf_Sz,asinf_dz
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p8) fma.d.s1 asinf_Pt = asinf_2poly_p4b,asinf_sgnx_t4,asinf_sgnx_2poly_p2
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+(p8) fma.d.s1 asinf_z = asinf_Az,asinf_Fz,asinf_Az
+ nop.i 999;;
+}
+
+.pred.rel "mutex",p8,p7 //asinf_pred_GTsqrt2by2,asinf_pred_LEsqrt2by2
+{ .mfi
+ nop.m 999
+(p8) fnma.s f8 = asinf_z,asinf_Pt,asinf_sgn_x_piby2
+ nop.i 999
+}
+
+{ .mfb
+ nop.m 999
+(p7) fma.s f8 = asinf_x11,asinf_poly_Bx,asinf_poly_Ax
+ br.ret.sptk b0 ;;
+}
+
+L(ASINF_ABS_ONE):
+// Here for short exit if |x|=1
+{ .mfb
+ nop.m 999
+ fma.s f8 = asinf_sgn_x,asinf_const_piby2,f0
+ br.ret.sptk b0
+}
+;;
+
+.endp asinf
+ASM_SIZE_DIRECTIVE(asinf)
+
+// Stack operations when calling error support.
+// (1) (2)
+// sp -> + psp -> +
+// | |
+// | | <- GR_Y
+// | |
+// | <-GR_Y Y2->|
+// | |
+// | | <- GR_X
+// | |
+// sp-64 -> + sp -> +
+// save ar.pfs save b0
+// save gp
+
+
+// Stack operations when calling error support.
+// (3) (call) (4)
+// psp -> + sp -> +
+// | |
+// R3 ->| <- GR_RESULT | -> f8
+// | |
+// Y2 ->| <- GR_Y |
+// | |
+// X1 ->| |
+// | |
+// sp -> + +
+// restore gp
+// restore ar.pfs
+
+.proc __libm_error_region
+__libm_error_region:
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 999
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfs [GR_Parameter_Y] = f1,16 // Store Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+
+.body
+{ .mfi
+ nop.m 0
+ frcpa.s0 f9,p0 = f0,f0
+ nop.i 0
+};;
+
+{ .mib
+ stfs [GR_Parameter_X] = f8 // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
+}
+{ .mib
+ stfs [GR_Parameter_Y] = f9 // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+
+{ .mmi
+ ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_asinl.S b/sysdeps/ia64/fpu/e_asinl.S
new file mode 100644
index 0000000..32bf4af
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_asinl.S
@@ -0,0 +1,777 @@
+.file "asinl.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00 Initial version
+// 4/04/00 Unwind support added
+// 8/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+//
+// API
+//==============================================================
+// long double = asinl(long double)
+// input floating point f8
+// output floating point f8
+//
+// Registers used
+//==============================================================
+//
+// predicate registers used:
+// p6 -> p12
+//
+// floating-point registers used:
+// f8 has input, then output
+// f32 -> f87, f8 -> f13, f32 -> f87
+//
+// general registers used:
+// r32 -> r47
+//
+// Overview of operation
+//==============================================================
+// There are three paths
+// 1. |x| < 2^-40 ASIN_TINY
+// 2. 2^-40 <= |x| < 1/4 ASIN_POLY
+// 3. 1/4 <= |x| < 1 ASIN_ATAN
+
+#include "libm_support.h"
+
+// Assembly macros
+//==============================================================
+FR_RESULT = f10
+FR_X = f8
+FR_Y = f1
+asin_P79 = f32
+asin_P59 = f33
+asin_P39 = f34
+asin_P19 = f35
+
+asin_P810 = f36
+asin_P610 = f37
+asin_P410 = f38
+asin_P210 = f39
+
+asin_A1 = f41
+asin_A2 = f42
+asin_A3 = f43
+asin_A4 = f44
+asin_A5 = f45
+asin_A6 = f46
+asin_A7 = f47
+asin_A8 = f48
+asin_A9 = f49
+asin_A10 = f50
+
+asin_X2 = f51
+asin_X4 = f52
+
+asin_B = f53
+asin_Bb = f54
+asin_C = f55
+asin_Cc = f56
+asin_D = f57
+
+asin_W = f58
+asin_Ww = f59
+
+asin_y0 = f60
+asin_y1 = f61
+asin_y2 = f62
+
+asin_H = f63
+asin_Hh = f64
+
+asin_t1 = f65
+asin_t2 = f66
+asin_t3 = f67
+asin_t4 = f68
+asin_t5 = f69
+
+asin_Pseries = f70
+asin_NORM_f8 = f71
+asin_ABS_NORM_f8 = f72
+
+asin_2m100 = f73
+asin_P1P2 = f74
+asin_HALF = f75
+asin_1mD = f76
+
+asin_1mB = f77
+asin_1mBmC = f78
+asin_S = f79
+
+asin_BmWW = f80
+asin_BmWWpb = f81
+asin_2W = f82
+asin_1d2W = f83
+asin_Dd = f84
+
+asin_XWw = f85
+asin_low = f86
+
+asin_pi_by_2 = f87
+asin_pi_by_2_lo = f88
+
+asin_GR_17_ones = r33
+asin_GR_16_ones = r34
+asin_GR_signexp_f8 = r35
+asin_GR_exp = r36
+asin_GR_true_exp = r37
+asin_GR_ff9b = r38
+
+GR_SAVE_B0 = r39
+GR_SAVE_SP = r40
+GR_SAVE_PFS = r33
+// r33 can be used safely.
+// r40 is address of table of coefficients
+// Later it is used to save sp across calls
+GR_SAVE_GP = r41
+asin_GR_fffe = r42
+asin_GR_retval = r43
+
+GR_Parameter_X = r44
+GR_Parameter_Y = r45
+GR_Parameter_RESULT = r46
+GR_Parameter_TAG = r47
+
+
+// 2^-40:
+// A true exponent of -40 is
+// : -40 + register_bias
+// : -28 + ffff = ffd7
+
+// A true exponent of -100 is
+// : -100 + register_bias
+// : -64 + ffff = ff9b
+
+// Data tables
+//==============================================================
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
+.align 16
+
+asin_coefficients:
+ASM_TYPE_DIRECTIVE(asin_coefficients,@object)
+data8 0xBB08911F2013961E, 0x00003FF8 // A10
+data8 0x981F1095A23A87D3, 0x00003FF8 // A9
+data8 0xBDF09C6C4177BCC6, 0x00003FF8 // A8
+data8 0xE4C3A60B049ACCEA, 0x00003FF8 // A7
+data8 0x8E2789F4E8A8F1AD, 0x00003FF9 // A6
+data8 0xB745D09B2B0E850B, 0x00003FF9 // A5
+data8 0xF8E38E3BC4C50920, 0x00003FF9 // A4
+data8 0xB6DB6DB6D89FCD81, 0x00003FFA // A3
+data8 0x99999999999AF376, 0x00003FFB // A2
+data8 0xAAAAAAAAAAAAAA71, 0x00003FFC // A1
+
+data8 0xc90fdaa22168c234, 0x00003FFF // pi_by_2_hi
+data8 0xc4c6628b80dc1cd1, 0x00003FBF // pi_by_2_lo
+ASM_SIZE_DIRECTIVE(asin_coefficients)
+
+.align 32
+.global asinl#
+
+.section .text
+.proc asinl#
+.align 32
+
+
+asinl:
+
+{ .mfi
+ alloc r32 = ar.pfs,1,11,4,0
+(p0) fnorm asin_NORM_f8 = f8
+(p0) mov asin_GR_17_ones = 0x1ffff
+}
+
+{ .mii
+(p0) mov asin_GR_16_ones = 0xffff
+(p0) mov asin_GR_ff9b = 0xff9b ;;
+ nop.i 999
+}
+
+
+{ .mmi
+(p0) setf.exp asin_2m100 = asin_GR_ff9b
+(p0) addl r40 = @ltoff(asin_coefficients), gp
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ld8 r40 = [r40]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+
+
+// Load the constants
+
+{ .mmi
+(p0) ldfe asin_A10 = [r40],16 ;;
+(p0) ldfe asin_A9 = [r40],16
+ nop.i 999 ;;
+}
+
+{ .mmi
+(p0) ldfe asin_A8 = [r40],16 ;;
+(p0) ldfe asin_A7 = [r40],16
+ nop.i 999 ;;
+}
+
+{ .mmi
+(p0) ldfe asin_A6 = [r40],16 ;;
+(p0) getf.exp asin_GR_signexp_f8 = asin_NORM_f8
+ nop.i 999
+}
+
+{ .mmi
+(p0) ldfe asin_A5 = [r40],16 ;;
+(p0) ldfe asin_A4 = [r40],16
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fmerge.s asin_ABS_NORM_f8 = f0, asin_NORM_f8
+(p0) and asin_GR_exp = asin_GR_signexp_f8, asin_GR_17_ones ;;
+}
+
+// case 1: |x| < 2^-40 ==> p6 (includes x = +-0)
+// case 2: 2^-40 <= |x| < 2^-2 ==> p8
+// case 3: 2^-2 <= |x| < 1 ==> p9
+// case 4: 1 <= |x| ==> p11
+// In case 4, we pick up the special case x = +-1 and return +-pi/2
+
+{ .mii
+(p0) ldfe asin_A3 = [r40],16
+(p0) sub asin_GR_true_exp = asin_GR_exp, asin_GR_16_ones ;;
+(p0) cmp.ge.unc p6, p7 = -41, asin_GR_true_exp ;;
+}
+
+{ .mii
+(p0) ldfe asin_A2 = [r40],16
+(p7) cmp.ge.unc p8, p9 = -3, asin_GR_true_exp ;;
+(p9) cmp.ge.unc p10, p11 = -1, asin_GR_true_exp
+}
+
+{ .mmi
+(p0) ldfe asin_A1 = [r40],16 ;;
+(p0) ldfe asin_pi_by_2 = [r40],16
+ nop.i 999
+}
+
+// case 4: |x| >= 1
+{ .mib
+ nop.m 999
+ nop.i 999
+(p11) br.spnt L(ASIN_ERROR_RETURN) ;;
+}
+
+// case 1: |x| < 2^-40
+{ .mfb
+ nop.m 999
+(p6) fma.s0 f8 = asin_2m100,f8,f8
+(p6) br.ret.spnt b0 ;;
+}
+
+
+// case 2: 2^-40 <= |x| < 2^-2 ==> p8
+{ .mfi
+ nop.m 999
+(p8) fma.s1 asin_X2 = f8,f8, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 asin_X4 = asin_X2,asin_X2, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 asin_P810 = asin_X4, asin_A10, asin_A8
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 asin_P79 = asin_X4, asin_A9, asin_A7
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 asin_P610 = asin_X4, asin_P810, asin_A6
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 asin_P59 = asin_X4, asin_P79, asin_A5
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 asin_P410 = asin_X4, asin_P610, asin_A4
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 asin_P39 = asin_X4, asin_P59, asin_A3
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 asin_P210 = asin_X4, asin_P410, asin_A2
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 asin_P19 = asin_X4, asin_P39, asin_A1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 asin_P1P2 = asin_X2, asin_P210, asin_P19
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 asin_P1P2 = asin_X2, asin_P1P2, f0
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p8) fma.s0 f8 = asin_NORM_f8, asin_P1P2, asin_NORM_f8
+(p8) br.ret.spnt b0 ;;
+}
+
+// case 3: 2^-2 <= |x| < 1
+// 1- X*X is computed as B + b
+// Step 1.1: Get B and b
+
+// atan2 will return
+// f8 = Z_hi
+// f10 = Z_lo
+// f11 = s_lo
+
+
+{ .mfi
+(p0) mov asin_GR_fffe = 0xfffe
+(p0) fmerge.se f8 = asin_ABS_NORM_f8, asin_ABS_NORM_f8
+nop.i 0
+};;
+
+{ .mmf
+nop.m 0
+(p0) setf.exp asin_HALF = asin_GR_fffe
+(p0) fmerge.se f12 = asin_NORM_f8, asin_NORM_f8 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p0) fcmp.lt.unc.s1 p6,p7 = asin_ABS_NORM_f8, asin_HALF
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p7) fma.s1 asin_D = f1,f1,asin_ABS_NORM_f8
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p7) fms.s1 asin_C = f1,f1,asin_ABS_NORM_f8
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p7) fma.s1 asin_B = asin_C, asin_D, f0
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p7) fms.s1 asin_1mD = f1,f1,asin_D
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p7) fma.s1 asin_Dd = asin_1mD,f1, asin_ABS_NORM_f8
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p7) fms.s1 asin_Bb = asin_C, asin_D, asin_B
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p7) fma.s1 asin_Bb = asin_C, asin_Dd, asin_Bb
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p6) fma.s1 asin_C = asin_ABS_NORM_f8, asin_ABS_NORM_f8, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p6) fms.s1 asin_B = f1, f1, asin_C
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p6) fms.s1 asin_Cc = asin_ABS_NORM_f8, asin_ABS_NORM_f8, asin_C
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 asin_Hh = asin_HALF, asin_B, f0
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p6) fms.s1 asin_1mB = f1, f1, asin_B
+ nop.i 999 ;;
+}
+
+// Step 1.2:
+// sqrt(B + b) is computed as W + w
+// Get W
+
+{ .mfi
+ nop.m 999
+(p0) frsqrta.s1 asin_y0,p8 = asin_B
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p6) fms.s1 asin_1mBmC = asin_1mB, f1, asin_C
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 asin_t1 = asin_y0, asin_y0, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p6) fms.s1 asin_Bb = asin_1mBmC, f1, asin_Cc
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fnma.s1 asin_t2 = asin_t1, asin_Hh, asin_HALF
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 asin_y1 = asin_t2, asin_y0, asin_y0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 asin_t3 = asin_y1, asin_Hh, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fnma.s1 asin_t4 = asin_t3, asin_y1, asin_HALF
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 asin_y2 = asin_t4, asin_y1, asin_y1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 asin_S = asin_B, asin_y2, f0
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 asin_H = asin_y2, asin_HALF, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 asin_t5 = asin_Hh, asin_y2, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fnma.s1 asin_Dd = asin_S, asin_S, asin_B
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 asin_W = asin_Dd, asin_H, asin_S
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 asin_2W = asin_W, f1, asin_W
+ nop.i 999
+}
+
+// Step 1.3
+// Get w
+{ .mfi
+ nop.m 999
+(p0) fnma.s1 asin_BmWW = asin_W, asin_W, asin_B
+ nop.i 999 ;;
+}
+
+// Step 2
+// asin(x) = atan2(X,sqrt(1-X*X))
+// = atan2(X, W) -Xw
+// corr = Xw
+// asin(x) = Z_hi + (s_lo*Z_lo - corr)
+// Call atan2(X, W)
+// Save W in f9
+// Save X in f12
+// Save w in f13
+
+{ .mfi
+ nop.m 999
+(p0) fmerge.se f9 = asin_W, asin_W
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 asin_BmWWpb = asin_BmWW, f1, asin_Bb
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) frcpa.s1 asin_1d2W,p9 = f1, asin_2W
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 asin_Ww = asin_BmWWpb, asin_1d2W, f0
+ nop.i 999 ;;
+}
+.endp asinl
+ASM_SIZE_DIRECTIVE(asinl)
+
+.proc __libm_callout
+__libm_callout:
+.prologue
+{ .mfi
+ nop.m 0
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+};;
+{ .mfi
+ mov GR_SAVE_GP=gp // Save gp
+ nop.f 0
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+}
+.body
+{.mfb
+ nop.m 0
+(p0) fmerge.se f13 = asin_Ww, asin_Ww
+(p0) br.call.sptk.many b0=__libm_atan2_reg#
+};;
+{ .mfi
+ mov gp = GR_SAVE_GP // Restore gp
+(p0) fma.s1 asin_XWw = asin_ABS_NORM_f8,f13,f0
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+// asin_XWw = Xw = corr
+// asin_low = (s_lo * Z_lo - corr)
+// f8 = Z_hi + (s_lo * Z_lo - corr)
+
+{ .mfi
+ nop.m 999
+(p0) fms.s1 asin_low = f11, f10, asin_XWw
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+};;
+
+{ .mfi
+ nop.m 999
+(p0) fma.s0 f8 = f8, f1, asin_low
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p0) fmerge.s f8 = f12,f8
+(p0) br.ret.sptk b0 ;;
+}
+.endp __libm_callout
+ASM_SIZE_DIRECTIVE(__libm_callout)
+
+.proc SPECIAL
+SPECIAL:
+L(ASIN_ERROR_RETURN):
+
+// If X is 1, return (sign of X)pi/2
+
+{ .mfi
+ nop.m 999
+(p0) fcmp.eq.unc p6,p7 = asin_ABS_NORM_f8,f1
+ nop.i 999 ;;
+}
+
+{ .mfb
+(p6) ldfe asin_pi_by_2_lo = [r40]
+(p6) fmerge.s asin_pi_by_2 = f8,asin_pi_by_2
+ nop.b 0;;
+}
+
+// If X is a NAN, leave
+// qnan snan inf norm unorm 0 -+
+// 1 1 0 0 0 0 11
+{ .mfb
+ nop.m 999
+(p6) fma.s0 f8 = f8,asin_pi_by_2_lo,asin_pi_by_2
+(p6) br.ret.spnt b0
+}
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p12,p0 = f8, 0xc3
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p12) fma.s0 f8 = f8,f1,f0
+(p12) br.ret.spnt b0 ;;
+}
+{ .mfi
+(p0) mov GR_Parameter_TAG = 60
+(p0) frcpa f10, p6 = f0, f0
+nop.i 0
+};;
+.endp SPECIAL
+ASM_SIZE_DIRECTIVE(SPECIAL)
+
+.proc __libm_error_region
+__libm_error_region:
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfe [GR_Parameter_Y] = FR_Y,16 // Store Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mib
+ stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
+}
+{ .mib
+ stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+{ .mmi
+ ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
+
+.type __libm_atan2_reg#,@function
+.global __libm_atan2_reg#
diff --git a/sysdeps/ia64/fpu/e_atan2.S b/sysdeps/ia64/fpu/e_atan2.S
new file mode 100644
index 0000000..6d6b11b
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_atan2.S
@@ -0,0 +1,1124 @@
+.file "atan2.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00 Initial version
+// 4/04/00 Unwind support added
+// 8/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+// 8/17/00 Changed predicate register macro-usage to direct predicate
+// names due to an assembler bug.
+// 9/28/00 Updated to set invalid on SNaN inputs
+// 1/19/01 Fixed flags for small results
+//
+// API
+//==============================================================
+// double atan2(double Y, double X)
+//
+// Overview of operation
+//==============================================================
+//
+// There are two basic paths: swap true and swap false.
+// atan2(Y,X) ==> atan2(V/U) where U >= V. If Y > X, we must swap.
+//
+// p6 swap True |Y| > |X|
+// p7 swap False |Y| <= |X|
+// p8 X+ (If swap=True p8=p9=0)
+// p9 X-
+//
+// all the other predicates p10 thru p15 are false for the main path
+//
+// Simple trigonometric identities show
+// Region 1 (-45 to +45 degrees):
+// X>0, |Y|<=X, V=Y, U=X atan2(Y,X) = sgnY * (0 + atan(V/U))
+//
+// Region 2 (-90 to -45 degrees, and +45 to +90 degrees):
+// X>0, |Y|>X, V=X, U=Y atan2(Y,X) = sgnY * (pi/2 - atan(V/U))
+//
+// Region 3 (-135 to -90 degrees, and +90 to +135 degrees):
+// X<0, |Y|>X, V=X, U=Y atan2(Y,X) = sgnY * (pi/2 + atan(V/U))
+//
+// Region 4 (-180 to -135 degrees, and +135 to +180 degrees):
+// X<0, |Y|<=X, V=Y, U=X atan2(Y,X) = sgnY * (pi - atan(V/U))
+//
+// So the result is always of the form atan2(Y,X) = P + sgnXY * atan(V/U)
+//
+// We compute atan(V/U) from the identity
+// atan(z) + atan([(V/U)-z] / [1+(V/U)z])
+// where z is a limited precision approximation (16 bits) to V/U
+//
+// z is calculated with the assistance of the frcpa instruction.
+//
+// atan(z) is calculated by a polynomial z + z^3 * p(w), w=z^2
+// where p(w) = P0+P1*w+...+P22*w^22
+//
+// Let d = [(V/U)-z] / [1+(V/U)z]) = (V-U*z)/(U+V*z)
+//
+// Approximate atan(d) by d + P0*d^3
+// Let F = 1/(U+V*z) * (1-a), where |a|< 2^-8.8.
+// Compute q(a) = 1 + a + ... + a^5.
+// Then F*q(a) approximates the reciprocal to more than 50 bits.
+
+// Special values
+//==============================================================
+// Y x Result
+// +number +inf +0
+// -number +inf -0
+// +number -inf +pi
+// -number -inf -pi
+//
+// +inf +number +pi/2
+// -inf +number -pi/2
+// +inf -number +pi/2
+// -inf -number -pi/2
+//
+// +inf +inf +pi/4
+// -inf +inf -pi/4
+// +inf -inf +3pi/4
+// -inf -inf -3pi/4
+//
+// +1 +1 +pi/4
+// -1 +1 -pi/4
+// +1 -1 +3pi/4
+// -1 -1 -3pi/4
+//
+// +number +0 +pi/2
+// -number +0 -pi/2
+// +number -0 +pi/2
+// -number -0 -pi/2
+//
+// +0 +number +0
+// -0 +number -0
+// +0 -number +pi
+// -0 -number -pi
+//
+// +0 +0 +0
+// -0 +0 -0
+// +0 -0 +pi
+// -0 -0 -pi
+//
+// Nan anything quiet Y
+// anything NaN quiet X
+
+// atan2(+-0/+-0) sets double error tag to 37
+// atan2(+-0/+-0) sets single error tag to 38
+
+#include "libm_support.h"
+
+// Assembly macros
+//==============================================================
+
+EXP_AD_P1 = r33
+EXP_AD_P2 = r34
+atan2_GR_sml_exp = r35
+
+
+GR_SAVE_B0 = r35
+GR_SAVE_GP = r36
+GR_SAVE_PFS = r37
+
+GR_Parameter_X = r38
+GR_Parameter_Y = r39
+GR_Parameter_RESULT = r40
+atan2_GR_tag = r41
+
+
+atan2_X = f9
+atan2_Y = f8
+
+atan2_u1_X = f32
+atan2_u1_Y = f33
+atan2_Umax = f34
+atan2_Vmin = f35
+atan2_two = f36
+atan2_absX = f37
+atan2_z1_X = f38
+atan2_z1_Y = f39
+atan2_B1X = f40
+atan2_B1Y = f41
+atan2_wp = f42
+atan2_B1sq = f43
+atan2_z = f44
+atan2_w = f45
+
+atan2_P0 = f46
+atan2_P1 = f47
+atan2_P2 = f48
+atan2_P3 = f49
+atan2_P4 = f50
+atan2_P5 = f51
+atan2_P6 = f52
+atan2_P7 = f53
+atan2_P8 = f54
+atan2_P9 = f55
+atan2_P10 = f56
+atan2_P11 = f57
+atan2_P12 = f58
+atan2_P13 = f59
+atan2_P14 = f60
+atan2_P15 = f61
+atan2_P16 = f62
+atan2_P17 = f63
+atan2_P18 = f64
+atan2_P19 = f65
+atan2_P20 = f66
+atan2_P21 = f67
+atan2_P22 = f68
+atan2_Pi_by_2 = f69
+
+atan2_V13 = f70
+atan2_W11 = f71
+atan2_E = f72
+atan2_gamma = f73
+atan2_V11 = f74
+atan2_V12 = f75
+atan2_V7 = f76
+atan2_V8 = f77
+atan2_W7 = f78
+atan2_W8 = f79
+atan2_W3 = f80
+atan2_W4 = f81
+atan2_V3 = f82
+atan2_V4 = f83
+atan2_F = f84
+atan2_gV = f85
+atan2_V10 = f86
+atan2_zcub = f87
+atan2_V6 = f88
+atan2_V9 = f89
+atan2_W10 = f90
+atan2_W6 = f91
+atan2_W2 = f92
+atan2_V2 = f93
+
+atan2_alpha = f94
+atan2_alpha_1 = f95
+atan2_gVF = f96
+atan2_V5 = f97
+atan2_W12 = f98
+atan2_W5 = f99
+atan2_alpha_sq = f100
+atan2_Cp = f101
+atan2_V1 = f102
+
+atan2_sml_norm = f103
+atan2_FR_tmp = f103
+
+atan2_W1 = f104
+atan2_alpha_cub = f105
+atan2_C = f106
+atan2_P = f107
+atan2_d = f108
+atan2_A_hi = f109
+atan2_dsq = f110
+atan2_pd = f111
+atan2_A_lo = f112
+atan2_A = f113
+
+atan2_Pp = f114
+
+atan2_sgnY = f116
+atan2_pi = f117
+atan2_sgnX = f118
+atan2_sgnXY = f119
+
+atan2_3pi_by_4 = f120
+atan2_pi_by_4 = f121
+
+//atan2_sF = p7
+//atan2_sT = p6
+
+// These coefficients are for atan2.
+// You can also use this set to substitute those used in the |X| <= 1 case for atan;
+// BUT NOT vice versa.
+
+/////////////////////////////////////////////////////////////
+
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
+.align 16
+
+atan2_tb1:
+ASM_TYPE_DIRECTIVE(atan2_tb1,@object)
+data8 0xB199DD6D2675C40F , 0x0000BFFA // P10
+data8 0xA21922DC45605EA1 , 0x00003FFA // P11
+data8 0xD78F28FC2A592781 , 0x0000BFFA // P8
+data8 0xC2F01E5DDD100DBE , 0x00003FFA // P9
+data8 0x9D89D7D55C3287A5 , 0x00003FFB // P5
+data8 0xF0F03ADB3FC930D3 , 0x00003FFA // P7
+data8 0xF396268151CFB11C , 0x00003FF7 // P17
+data8 0x9D3436AABE218776 , 0x00003FF5 // P19
+data8 0x80D601879218B53A , 0x00003FFA // P13
+data8 0xA2270D30A90AA220 , 0x00003FF9 // P15
+data8 0xCCCCCCCCCCC906CD , 0x00003FFC // P1
+data8 0xE38E38E320A8A098 , 0x00003FFB // P3
+data8 0xFE7E52D2A89995B3 , 0x0000BFEC // P22
+data8 0xC90FDAA22168C235 , 0x00003FFE // pi/4
+ASM_SIZE_DIRECTIVE(atan2_tb1)
+
+atan2_tb2:
+ASM_TYPE_DIRECTIVE(atan2_tb2,@object)
+data8 0x9F90FB984D8E39D0 , 0x0000BFF3 // P20
+data8 0xCE585A259BD8374C , 0x00003FF0 // P21
+data8 0xBA2E8B9793955C77 , 0x0000BFFB // P4
+data8 0x88887EBB209E3543 , 0x0000BFFB // P6
+data8 0xD818B4BB43D84BF2 , 0x0000BFF8 // P16
+data8 0xDEC343E068A6D2A8 , 0x0000BFF6 // P18
+data8 0x9297B23CCFFB291F , 0x0000BFFA // P12
+data8 0xD5F4F2182E7A8725 , 0x0000BFF9 // P14
+data8 0xAAAAAAAAAAAAA8A9 , 0x0000BFFD // P0
+data8 0x9249249247E37913 , 0x0000BFFC // P2
+data8 0xC90FDAA22168C235 , 0x00003FFF // pi/2
+data8 0xC90FDAA22168C235 , 0x00004000 // pi
+data8 0x96cbe3f9990e91a8 , 0x00004000 // 3pi/4
+ASM_SIZE_DIRECTIVE(atan2_tb2)
+
+
+
+
+.align 32
+.global atan2#
+#ifdef _LIBC
+.global __atan2#
+.global __ieee754_atan2#
+#endif
+
+////////////////////////////////////////////////////////
+
+.section .text
+.align 32
+
+.proc atan2#
+atan2:
+#ifdef _LIBC
+.proc __atan2#
+__atan2:
+.proc __ieee754_atan2#
+__ieee754_atan2:
+#endif
+// qnan snan inf norm unorm 0 -+
+// 0 0 1 0 0 0 11
+
+
+// Y NAN? p10 p11
+// p10 ==> quiet Y and return
+// p11 X NAN? p12, p13
+// p12 ==> quiet X and return
+
+{ .mfi
+ alloc r32 = ar.pfs,1,5,4,0
+ frcpa.s1 atan2_u1_X,p6 = f1,atan2_X
+ addl EXP_AD_P2 = @ltoff(atan2_tb2), gp
+}
+{ .mfi
+ addl EXP_AD_P1 = @ltoff(atan2_tb1), gp
+ fclass.m.unc p10,p11 = f8, 0xc3
+ nop.i 999
+;;
+}
+
+{ .mfi
+ ld8 EXP_AD_P1 = [EXP_AD_P1]
+ frcpa.s1 atan2_u1_Y,p7 = f1,atan2_Y
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_two = f1,f1,f1
+ nop.i 999
+;;
+}
+
+
+{ .mfi
+ ld8 EXP_AD_P2 = [ EXP_AD_P2]
+ famax.s1 atan2_Umax = f8,f9
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fmerge.s atan2_absX = f0,atan2_X
+ nop.i 999
+}
+;;
+
+// p10 Y NAN, quiet and return
+{ .mfi
+ ldfe atan2_P10 = [EXP_AD_P1],16
+ fmerge.s atan2_sgnY = atan2_Y,f1
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p10) fma.d f8 = f8,f9,f0
+(p10) br.ret.spnt b0
+;;
+}
+
+
+{ .mmf
+ ldfe atan2_P11 = [EXP_AD_P1],16
+ ldfe atan2_P20 = [EXP_AD_P2],16
+ fmerge.s atan2_sgnX = atan2_X,f1
+;;
+}
+
+
+{ .mfi
+ ldfe atan2_P8 = [EXP_AD_P1],16
+ fma.s1 atan2_z1_X = atan2_u1_X, atan2_Y, f0
+ nop.i 999
+}
+{ .mfi
+
+ ldfe atan2_P21 = [EXP_AD_P2],16
+ fma.s1 atan2_z1_Y = atan2_u1_Y, atan2_X, f0
+ nop.i 999
+;;
+}
+
+{ .mfi
+ ldfe atan2_P9 = [EXP_AD_P1],16
+ fnma.s1 atan2_B1X = atan2_u1_X, atan2_X, atan2_two
+ nop.i 999
+}
+{ .mfi
+
+ ldfe atan2_P4 = [EXP_AD_P2],16
+ fnma.s1 atan2_B1Y = atan2_u1_Y, atan2_Y, atan2_two
+ nop.i 999
+;;
+}
+
+// p6 (atan2_sT) true if swap
+// p7 (atan2_sF) true if no swap
+// p11 ==> Y !NAN; X NAN?
+
+{ .mfi
+ ldfe atan2_P5 = [EXP_AD_P1],16
+// fcmp.eq.unc.s1 atan2_sF,atan2_sT = atan2_Umax, atan2_X
+ fcmp.eq.unc.s1 p7,p6 = atan2_Umax, atan2_X
+ nop.i 999
+}
+{ .mfi
+ ldfe atan2_P6 = [EXP_AD_P2],16
+(p11) fclass.m.unc p12,p13 = f9, 0xc3
+ nop.i 999
+;;
+}
+
+{ .mmf
+ ldfe atan2_P7 = [EXP_AD_P1],16
+ ldfe atan2_P16 = [EXP_AD_P2],16
+ famin.s1 atan2_Vmin = f8,f9
+;;
+}
+
+// p8 true if X positive
+// p9 true if X negative
+// both are false is swap is true
+{ .mfi
+ ldfe atan2_P17 = [EXP_AD_P1],16
+//(atan2_sF) fcmp.eq.unc.s1 p8,p9 = atan2_sgnX,f1
+(p7) fcmp.eq.unc.s1 p8,p9 = atan2_sgnX,f1
+ nop.i 999
+}
+{ .mfi
+ ldfe atan2_P18 = [EXP_AD_P2],16
+ fma.s1 atan2_sgnXY = atan2_sgnX, atan2_sgnY, f0
+ nop.i 999
+;;
+}
+
+
+{ .mfi
+ ldfe atan2_P19 = [EXP_AD_P1],16
+//(atan2_sF) fma.s1 atan2_wp = atan2_z1_X, atan2_z1_X, f0
+(p7) fma.s1 atan2_wp = atan2_z1_X, atan2_z1_X, f0
+ nop.i 999
+}
+{ .mfi
+ ldfe atan2_P12 = [EXP_AD_P2],16
+//(atan2_sT) fma.s1 atan2_wp = atan2_z1_Y, atan2_z1_Y, f0
+(p6) fma.s1 atan2_wp = atan2_z1_Y, atan2_z1_Y, f0
+ nop.i 999
+;;
+}
+
+
+{ .mfi
+ ldfe atan2_P13 = [EXP_AD_P1],16
+//(atan2_sF) fma.s1 atan2_z = atan2_z1_X, atan2_B1X, f0
+(p7) fma.s1 atan2_z = atan2_z1_X, atan2_B1X, f0
+ nop.i 999
+}
+{ .mfi
+ ldfe atan2_P14 = [EXP_AD_P2],16
+//(atan2_sT) fma.s1 atan2_z = atan2_z1_Y, atan2_B1Y, f0
+(p6) fma.s1 atan2_z = atan2_z1_Y, atan2_B1Y, f0
+ nop.i 999
+;;
+}
+
+
+{ .mfi
+ ldfe atan2_P15 = [EXP_AD_P1],16
+//(atan2_sF) fma.s1 atan2_B1sq = atan2_B1X, atan2_B1X, f0
+(p7) fma.s1 atan2_B1sq = atan2_B1X, atan2_B1X, f0
+ nop.i 999
+}
+{ .mfi
+ ldfe atan2_P0 = [EXP_AD_P2],16
+//(atan2_sT) fma.s1 atan2_B1sq = atan2_B1Y, atan2_B1Y, f0
+(p6) fma.s1 atan2_B1sq = atan2_B1Y, atan2_B1Y, f0
+ nop.i 999
+;;
+}
+
+
+// p12 ==> X NAN, quiet and return
+{ .mfi
+ ldfe atan2_P1 = [EXP_AD_P1],16
+ fmerge.s atan2_Umax = f0,atan2_Umax
+ nop.i 999
+}
+{ .mfb
+ ldfe atan2_P2 = [EXP_AD_P2],16
+(p12) fma.d f8 = f9,f8,f0
+(p12) br.ret.spnt b0
+;;
+}
+
+
+// p10 ==> x inf y ?
+// p11 ==> x !inf y ?
+{ .mfi
+ ldfe atan2_P3 = [EXP_AD_P1],16
+ fmerge.s atan2_Vmin = f0,atan2_Vmin
+ nop.i 999
+}
+{ .mfi
+ ldfe atan2_Pi_by_2 = [EXP_AD_P2],16
+ fclass.m.unc p10,p11 = f9, 0x23
+ nop.i 999
+;;
+}
+
+
+{ .mmf
+ ldfe atan2_P22 = [EXP_AD_P1],16
+ ldfe atan2_pi = [EXP_AD_P2],16
+ nop.f 999
+;;
+}
+
+{ .mfi
+ nop.m 999
+ fcmp.eq.s0 p12,p13=f9,f8 // Dummy to catch denormal and invalid
+ nop.i 999
+;;
+}
+
+
+{ .mfi
+ ldfe atan2_pi_by_4 = [EXP_AD_P1],16
+//(atan2_sT) fmerge.ns atan2_sgnXY = atan2_sgnXY, atan2_sgnXY
+(p6) fmerge.ns atan2_sgnXY = atan2_sgnXY, atan2_sgnXY
+ nop.i 999
+}
+{ .mfi
+ ldfe atan2_3pi_by_4 = [EXP_AD_P2],16
+ fma.s1 atan2_w = atan2_wp, atan2_B1sq,f0
+ nop.i 999
+;;
+}
+
+// p12 ==> x inf y inf
+// p13 ==> x inf y !inf
+{ .mfi
+ nop.m 999
+ fmerge.s atan2_z = f0, atan2_z
+ nop.i 999
+;;
+}
+
+{ .mfi
+ nop.m 99
+(p10) fclass.m.unc p12,p13 = f8, 0x23
+ nop.i 999
+}
+{ .mfi
+ nop.m 99
+(p11) fclass.m.unc p14,p15 = f8, 0x23
+ nop.i 999
+;;
+}
+
+{ .mfi
+ nop.m 999
+(p12) fcmp.eq.unc.s1 p10,p11 = atan2_sgnX,f1
+ nop.i 99
+;;
+}
+
+
+{ .mfb
+ mov atan2_GR_sml_exp = 0x1 // Small exponent for making small norm
+(p14) fma.d f8 = atan2_sgnY, atan2_Pi_by_2, f0
+(p14) br.ret.spnt b0
+;;
+}
+
+// Make a very small normal in case need to force inexact and underflow
+{ .mfi
+ setf.exp atan2_sml_norm = atan2_GR_sml_exp
+ fma.s1 atan2_V13 = atan2_w, atan2_P11, atan2_P10
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_W11 = atan2_w, atan2_P21, atan2_P20
+ nop.i 999
+;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_E = atan2_Vmin, atan2_z, atan2_Umax
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fnma.s1 atan2_gamma = atan2_Umax, atan2_z, f1
+ nop.i 999
+;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_V11 = atan2_w, atan2_P9, atan2_P8
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_V12 = atan2_w, atan2_w, f0
+ nop.i 999
+;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_V7 = atan2_w, atan2_P5 , atan2_P4
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_V8 = atan2_w, atan2_P7 , atan2_P6
+ nop.i 999
+;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_W7 = atan2_w, atan2_P17, atan2_P16
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_W8 = atan2_w, atan2_P19, atan2_P18
+ nop.i 999
+;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_W3 = atan2_w, atan2_P13, atan2_P12
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_W4 = atan2_w, atan2_P15, atan2_P14
+ nop.i 999
+;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_V3 = atan2_w, atan2_P1 , atan2_P0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_V4 = atan2_w, atan2_P3 , atan2_P2
+ nop.i 999
+;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_zcub = atan2_z, atan2_w, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fnma.s1 atan2_gV = atan2_Umax, atan2_z, atan2_Vmin
+ nop.i 999
+;;
+}
+
+{ .mfi
+ nop.m 999
+ frcpa.s1 atan2_F,p15 = f1, atan2_E
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_V10 = atan2_V12, atan2_V13, atan2_V11
+ nop.i 999
+;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_V6 = atan2_V12, atan2_V8 , atan2_V7
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_V9 = atan2_V12, atan2_V12, f0
+ nop.i 999
+;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_W10 = atan2_V12, atan2_P22 , atan2_W11
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_W6 = atan2_V12, atan2_W8 , atan2_W7
+ nop.i 999
+;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_W2 = atan2_V12, atan2_W4 , atan2_W3
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_V2 = atan2_V12, atan2_V4 , atan2_V3
+ nop.i 999
+;;
+}
+
+
+// Both X and Y are INF
+// p10 ==> X +
+// p11 ==> X -
+.pred.rel "mutex",p10,p11
+{ .mfb
+ nop.m 999
+(p10) fma.d f8 = atan2_sgnY, atan2_pi_by_4, f0
+(p10) br.ret.spnt b0
+}
+{ .mfb
+ nop.m 999
+(p11) fma.d f8 = atan2_sgnY, atan2_3pi_by_4, f0
+(p11) br.ret.spnt b0
+;;
+}
+
+
+.pred.rel "mutex",p8,p9,p6
+{ .mfi
+ nop.m 999
+ fnma.s1 atan2_alpha = atan2_E, atan2_F, f1
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fnma.s1 atan2_alpha_1 = atan2_E, atan2_F, atan2_two
+ nop.i 999
+;;
+}
+
+
+{ .mfi
+ nop.m 999
+//(atan2_sT) fmerge.s atan2_P = atan2_Y, atan2_Pi_by_2
+(p6) fmerge.s atan2_P = atan2_Y, atan2_Pi_by_2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_gVF = atan2_gV, atan2_F, f0
+ nop.i 999
+;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_V5 = atan2_V9, atan2_V10, atan2_V6
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_W12 = atan2_V9, atan2_V9, f0
+ nop.i 999
+;;
+}
+
+
+
+{ .mfi
+ nop.m 999
+(p8) fmerge.s atan2_P = atan2_sgnY, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_W5 = atan2_V9, atan2_W10, atan2_W6
+ nop.i 999
+;;
+}
+
+
+
+
+{ .mfi
+ nop.m 999
+(p9) fmerge.s atan2_P = atan2_sgnY, atan2_pi
+ nop.i 999
+;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_alpha_sq = atan2_alpha, atan2_alpha, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_Cp = atan2_alpha, atan2_alpha_1, f1
+ nop.i 999
+;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_V1 = atan2_V9, atan2_V5, atan2_V2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_W12 = atan2_V9, atan2_W12, f0
+ nop.i 999
+;;
+}
+
+
+// p13 ==> x inf y !inf
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_W1 = atan2_V9, atan2_W5, atan2_W2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p13) fcmp.eq.unc.s1 p10,p11 = atan2_sgnX,f1
+ nop.i 999
+;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_alpha_cub = atan2_alpha, atan2_alpha_sq, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_C = atan2_gVF, atan2_Cp, f0
+ nop.i 999
+;;
+}
+
+.pred.rel "mutex",p10,p11
+// x inf y !inf
+{ .mfb
+ nop.m 999
+(p10) fmerge.s f8 = atan2_sgnY, f0
+(p10) br.ret.spnt b0
+}
+{ .mfb
+ nop.m 999
+(p11) fma.d f8 = atan2_sgnY, atan2_pi, f0
+(p11) br.ret.spnt b0
+;;
+}
+
+
+
+// p10 ==> y 0 x?
+// p11 ==> y !0 x?
+{ .mfi
+ nop.m 999
+ fclass.m.unc p10,p11 = f8, 0x07
+ nop.i 999
+;;
+}
+
+{ .mfi
+ nop.m 999
+(p8) fmerge.s atan2_sml_norm = atan2_sgnY, atan2_sml_norm
+ nop.i 999
+;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_Pp = atan2_W12, atan2_W1, atan2_V1
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_d = atan2_alpha_cub, atan2_C, atan2_C
+ nop.i 999
+;;
+}
+
+// p12 ==> y0 x0
+// p13 ==> y0 x!0
+// p14 ==> y!0 x0
+// p15 ==> y!0 x!0
+{ .mfi
+ nop.m 999
+(p10) fclass.m.unc p12,p13 = f9, 0x07
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p11) fclass.m.unc p14,p15 = f9, 0x07
+ nop.i 999
+;;
+}
+
+
+
+
+{ .mfb
+ nop.m 999
+(p13) fcmp.eq.unc.s1 p10,p11 = atan2_sgnX,f1
+(p12) br.spnt ATAN2_ERROR
+;;
+}
+
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_pd = atan2_P0, atan2_d, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_dsq = atan2_d, atan2_d, f0
+ nop.i 999
+;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_A_hi = atan2_zcub, atan2_Pp, atan2_z
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p14) fma.d f8 = atan2_sgnY, atan2_Pi_by_2, f0
+(p14) br.ret.spnt b0
+;;
+}
+
+
+
+{ .mfb
+ nop.m 999
+(p10) fmerge.s f8 = atan2_sgnY, f0
+(p10) br.ret.spnt b0
+}
+{ .mfb
+ nop.m 999
+(p11) fma.d f8 = atan2_sgnY, atan2_pi, f0
+(p11) br.ret.spnt b0
+;;
+}
+
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_A_lo = atan2_pd, atan2_dsq, atan2_d
+ nop.i 999
+;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_A = atan2_A_hi, f1, atan2_A_lo
+ nop.i 999
+;;
+}
+
+// Force inexact and possibly underflow if very small results
+{ .mfi
+ nop.m 999
+(p8) fma.d atan2_FR_tmp = atan2_sgnXY, atan2_A, atan2_sml_norm
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+ fma.d f8 = atan2_sgnXY, atan2_A, atan2_P
+ br.ret.sptk b0
+;;
+}
+
+ATAN2_ERROR:
+
+{ .mfi
+ nop.m 999
+ fcmp.eq.unc.s1 p10,p11 = atan2_sgnX,f1
+ nop.i 999
+}
+;;
+
+{ .mfi
+ mov atan2_GR_tag = 37
+(p10) fmerge.s f10 = atan2_sgnY, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p11) fma.d f10 = atan2_sgnY, atan2_pi, f0
+ nop.i 999
+;;
+}
+.endp atan2#
+ASM_SIZE_DIRECTIVE(atan2#)
+
+
+// Stack operations when calling error support.
+// (1) (2) (3) (call) (4)
+// sp -> + psp -> + psp -> + sp -> +
+// | | | |
+// | | <- GR_Y R3 ->| <- GR_RESULT | -> f8
+// | | | |
+// | <-GR_Y Y2->| Y2 ->| <- GR_Y |
+// | | | |
+// | | <- GR_X X1 ->| |
+// | | | |
+// sp-64 -> + sp -> + sp -> + +
+// save ar.pfs save b0 restore gp
+// save gp restore ar.pfs
+
+
+.proc __libm_error_region
+__libm_error_region:
+.prologue
+// (1)
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 999
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+
+
+// (2)
+{ .mmi
+ stfd [GR_Parameter_Y] = f8,16 // STORE Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+
+.body
+// (3)
+{ .mib
+ stfd [GR_Parameter_X] = f9 // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
+}
+{ .mib
+ stfd [GR_Parameter_Y] = f10 // STORE Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+
+// (4)
+{ .mmi
+ ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_atan2f.S b/sysdeps/ia64/fpu/e_atan2f.S
new file mode 100644
index 0000000..85d25a7
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_atan2f.S
@@ -0,0 +1,907 @@
+.file "atan2f.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 6/1/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+
+// History
+//==============================================================
+// 6/01/00 Initial version
+// 8/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+// 8/17/00 Changed predicate register macro-usage to direct predicate
+// names due to an assembler bug.
+// 1/05/01 Fixed flag settings for denormal input.
+// 1/19/01 Added documentation
+// 1/30/01 Improved speed
+
+// Description
+//=========================================
+// The atan2 function computes the principle value of the arc tangent of y/x using
+// the signs of both arguments to determine the quadrant of the return value.
+// A domain error may occur if both arguments are zero.
+
+// The atan2 function returns the arc tangent of y/x in the range [-pi,+pi] radians.
+
+//..
+//..Let (v,u) = (y,x) if |y| <= |x|, and (v,u) = (x,y) otherwise. Note that
+//..v and u can be negative. We state the relationship between atan2(y,x) and
+//..atan(v/u).
+//..
+//..Let swap = false if v = y, and swap = true if v = x.
+//..Define C according to the matrix
+//..
+//.. TABLE FOR C
+//.. x +ve x -ve
+//.. no swap (swap = false) sgn(y)*0 sgn(y)*pi
+//.. swap (swap = true ) sgn(y)*pi/2 sgn(y)*pi/2
+//..
+//.. atan2(y,x) = C + atan(v/u) if no swap
+//.. atan2(y,x) = C - atan(v/u) if swap
+//..
+//..These relationship is more efficient to compute as we accommodate signs in v and u
+//..saving the need to obtain the absolute value before computation can proceed.
+//..
+//..Suppose (v,u) = (y,x), we calculate atan(v/u) as follows:
+//..A = y * frcpa(x) (so A = (y/x)(1 - beta))
+//..atan(y/x) = atan(A) + atan( ((y/x)-A))/(1 + (y/x)A) ), the second term is
+//..a correction.
+//..atan(A) is approximated by a polynomial
+//..A + p1 A^3 + p2 A^5 + ... + p10 A^21,
+//..atan(G) is approximated as follows:
+//..Let G = (y - Ax)/(x + Ay), atan(G) can be approximated by G + g * p1
+//..where g is a limited precision approximation to G via g = (y - Ax)*frcpa(x + Ay).
+//..
+//..Suppose (v,u) = (x,y), we calculate atan(v/u) as follows:
+//..Z = x * frcpa(y) (so Z = (x/y)(1 - beta))
+//..atan(x/y) = atan(Z) + atan( ((x/y)-Z))/(1 + (x/y)Z) ), the second term is
+//..a correction.
+//..atan(Z) is approximated by a polynomial
+//..Z + p1 Z^3 + p2 Z^5 + ... + p10 Z^21,
+//..atan(T) is approximated as follows:
+//..Let T = (x - Ay)/(y + Ax), atan(T) can be approximated by T + t * p1
+//..where t is a limited precision approximation to T via t = (x - Ay)*frcpa(y + Ax).
+//..
+//..
+//..A = y * frcpa(x)
+//..atan(A) ~=~ A + p1 A^3 + ... + P10 A^21
+//..
+//..This polynomial is computed as follows:
+//..Asq = A*A; Acub = A*Asq, A4 = Asq*Asq
+//..A5 = Asq*Acub, A6 = Asq*A4; A11 = A5 * A6
+//..
+//..poly_A1 = p9 + Asq*p10, poly_A2 = p7 + Asq*p8, poly_A3 = p5 + Asq*p6
+//..poly_A1 = poly_A2 + A4 * poly_A1
+//..poly_A1 = poly_A3 + A4 * poly_A1
+//..
+//..poly_A4 = p1 * A
+//,,poly_A5 = p3 + Asq * p4, poly_A4 = A + Asq*poly_A4
+//..poly_A5 = p2 + Asq * poly_A5
+//..poly_A4 = poly_A4 + A5 * poly_A5
+//..
+//..atan_A = poly_A4 + A11 * poly_A1
+//..
+//..atan(G) is approximated as follows:
+//..G_numer = y - A*x, G_denom = x + A*y
+//..H1 = frcpa(G_denom)
+//..H_beta = 1 - H1 * G_denom
+//..H2 = H1 + H1 * H_beta
+//..H_beta2 = H_beta*H_beta
+//..H3 = H2 + H2*H_beta2
+//..g = H1 * G_numer; gsq = g*g; atan_G = g*p1, atan_G = atan_G*gsq
+//..atan_G = G_numer*H3 + atan_G
+//..
+//..
+//..A = y * frcpa(x)
+//..atan(A) ~=~ A + p1 A^3 + ... + P10 A^21
+//..
+//..This polynomial is computed as follows:
+//..Asq = A*A; Acub = A*Asq, A4 = Asq*Asq
+//..A5 = Asq*Acub, A6 = Asq*A4; A11 = A5 * A6
+//..
+//..poly_A1 = p9 + Asq*p10, poly_A2 = p7 + Asq*p8, poly_A3 = p5 + Asq*p6
+//..poly_A1 = poly_A2 + A4 * poly_A1
+//..poly_A1 = poly_A3 + A4 * poly_A1
+//..
+//..poly_A4 = p1 * A
+//,,poly_A5 = p3 + Asq * p4, poly_A4 = A + Asq*poly_A4
+//..poly_A5 = p2 + Asq * poly_A5
+//..poly_A4 = poly_A4 + A5 * poly_A5
+//..
+//..atan_A = poly_A4 + A11 * poly_A1
+//..
+//..
+//..====================================================================
+//.. COEFFICIENTS USED IN THE COMPUTATION
+//..====================================================================
+
+//coef_pj, j = 1,2,...,10; atan(A) ~=~ A + p1 A^3 + p2 A^5 + ... + p10 A^21
+//
+// coef_p1 = -.3333332707155439167401311806315789E+00
+// coef_p1 in dbl = BFD5 5555 1219 1621
+//
+// coef_p2 = .1999967670926658391827857030875748E+00
+// coef_p2 in dbl = 3FC9 997E 7AFB FF4E
+//
+// coef_p3 = -.1427989384500152360161563301087296E+00
+// coef_p3 in dbl = BFC2 473C 5145 EE38
+//
+// coef_p4 = .1105852823460720770079031213661163E+00
+// coef_p4 in dbl = 3FBC 4F51 2B18 65F5
+//
+// coef_p5 = -.8811839915595312348625710228448363E-01
+// coef_p5 in dbl = BFB6 8EED 6A8C FA32
+//
+// coef_p6 = .6742329836955067042153645159059714E-01
+// coef_p6 in dbl = 3FB1 42A7 3D7C 54E3
+//
+// coef_p7 = -.4468571068774672908561591262231909E-01
+// coef_p7 in dbl = BFA6 E10B A401 393F
+//
+// coef_p8 = .2252333246746511135532726960586493E-01
+// coef_p8 in dbl = 3F97 105B 4160 F86B
+//
+// coef_p9 = -.7303884867007574742501716845542314E-02
+// coef_p9 in dbl = BF7D EAAD AA33 6451
+//
+// coef_p10 = .1109686868355312093949039454619058E-02
+// coef_p10 in dbl = 3F52 2E5D 33BC 9BAA
+//
+
+// Special values
+//==============================================================
+// Y x Result
+// +number +inf +0
+// -number +inf -0
+// +number -inf +pi
+// -number -inf -pi
+//
+// +inf +number +pi/2
+// -inf +number -pi/2
+// +inf -number +pi/2
+// -inf -number -pi/2
+//
+// +inf +inf +pi/4
+// -inf +inf -pi/4
+// +inf -inf +3pi/4
+// -inf -inf -3pi/4
+//
+// +1 +1 +pi/4
+// -1 +1 -pi/4
+// +1 -1 +3pi/4
+// -1 -1 -3pi/4
+//
+// +number +0 +pi/2 // does not raise DBZ
+// -number +0 -pi/2 // does not raise DBZ
+// +number -0 +pi/2 // does not raise DBZ
+// -number -0 -pi/2 // does not raise DBZ
+//
+// +0 +number +0
+// -0 +number -0
+// +0 -number +pi
+// -0 -number -pi
+//
+// +0 +0 +0 // does not raise invalid
+// -0 +0 -0 // does not raise invalid
+// +0 -0 +pi // does not raise invalid
+// -0 -0 -pi // does not raise invalid
+//
+// Nan anything quiet Y
+// anything NaN quiet X
+
+// atan2(+-0/+-0) sets double error tag to 37
+// atan2f(+-0/+-0) sets single error tag to 38
+// These are domain errors.
+
+#include "libm_support.h"
+
+//
+// Assembly macros
+//=========================================
+
+
+// integer registers
+atan2f_GR_Addr_1 = r33
+atan2f_GR_Addr_2 = r34
+GR_SAVE_B0 = r35
+
+GR_SAVE_PFS = r36
+GR_SAVE_GP = r37
+
+GR_Parameter_X = r38
+GR_Parameter_Y = r39
+GR_Parameter_RESULT = r40
+GR_Parameter_TAG = r41
+
+// floating point registers
+atan2f_coef_p1 = f32
+atan2f_coef_p10 = f33
+atan2f_coef_p7 = f34
+atan2f_coef_p6 = f35
+
+atan2f_coef_p3 = f36
+atan2f_coef_p2 = f37
+atan2f_coef_p9 = f38
+atan2f_coef_p8 = f39
+atan2f_coef_p5 = f40
+
+atan2f_coef_p4 = f41
+atan2f_const_piby2 = f42
+atan2f_const_pi = f43
+atan2f_const_piby4 = f44
+atan2f_const_3piby4 = f45
+
+atan2f_xsq = f46
+atan2f_ysq = f47
+atan2f_xy = f48
+atan2f_const_1 = f49
+atan2f_sgn_Y = f50
+
+atan2f_Z0 = f51
+atan2f_A0 = f52
+atan2f_Z = f53
+atan2f_A = f54
+atan2f_C = f55
+
+atan2f_U = f56
+atan2f_Usq = f57
+atan2f_U4 = f58
+atan2f_U6 = f59
+atan2f_U8 = f60
+
+atan2f_poly_u109 = f61
+atan2f_poly_u87 = f62
+atan2f_poly_u65 = f63
+atan2f_poly_u43 = f64
+atan2f_poly_u21 = f65
+
+atan2f_poly_u10to7 = f66
+atan2f_poly_u6to3 = f67
+atan2f_poly_u10to3 = f68
+atan2f_poly_u10to0 = f69
+atan2f_poly_u210 = f70
+
+atan2f_T_numer = f71
+atan2f_T_denom = f72
+atan2f_G_numer = f73
+atan2f_G_denom = f74
+atan2f_p1rnum = f75
+
+atan2f_R_denom = f76
+atan2f_R_numer = f77
+atan2f_pR = f78
+atan2f_pRC = f79
+atan2f_pQRC = f80
+
+atan2f_Q1 = f81
+atan2f_Q_beta = f82
+atan2f_Q2 = f83
+atan2f_Q_beta2 = f84
+atan2f_Q3 = f85
+
+atan2f_r = f86
+atan2f_rsq = f87
+atan2f_poly_atan_U = f88
+
+
+// predicate registers
+//atan2f_Pred_Swap = p6 // |y| > |x|
+//atan2f_Pred_noSwap = p7 // |y| <= |x|
+//atan2f_Pred_Xpos = p8 // x >= 0
+//atan2f_Pred_Xneg = p9 // x < 0
+
+
+.data
+
+.align 16
+
+atan2f_coef_table1:
+ASM_TYPE_DIRECTIVE(atan2f_coef_table1,@object)
+data8 0xBFD5555512191621 // p1
+data8 0x3F522E5D33BC9BAA // p10
+data8 0xBFA6E10BA401393F // p7
+data8 0x3FB142A73D7C54E3 // p6
+data8 0xBFC2473C5145EE38 // p3
+data8 0x3FC9997E7AFBFF4E // p2
+ASM_SIZE_DIRECTIVE(atan2f_coef_table1)
+
+atan2f_coef_table2:
+ASM_TYPE_DIRECTIVE(atan2f_coef_table2,@object)
+data8 0xBF7DEAADAA336451 // p9
+data8 0x3F97105B4160F86B // p8
+data8 0xBFB68EED6A8CFA32 // p5
+data8 0x3FBC4F512B1865F5 // p4
+data8 0x3ff921fb54442d18 // pi/2
+data8 0x400921fb54442d18 // pi
+data8 0x3fe921fb54442d18 // pi/4
+data8 0x4002d97c7f3321d2 // 3pi/4
+ASM_SIZE_DIRECTIVE(atan2f_coef_table2)
+
+
+
+.global atan2f
+#ifdef _LIBC
+.global __atan2f
+.global __ieee754_atan2f
+#endif
+
+.text
+.align 32
+
+atan2f:
+.proc atan2f
+#ifdef _LIBC
+.proc __atan2f
+__atan2f:
+.proc __ieee754_atan2f
+__ieee754_atan2f:
+#endif
+
+
+
+{ .mfi
+ alloc r32 = ar.pfs,1,5,4,0
+ frcpa.s1 atan2f_Z0,p0 = f1,f8 // Approx to 1/y
+ nop.i 999
+}
+{ .mfi
+ addl atan2f_GR_Addr_1 = @ltoff(atan2f_coef_table1),gp
+ fma.s1 atan2f_xsq = f9,f9,f0
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ ld8 atan2f_GR_Addr_1 = [atan2f_GR_Addr_1]
+ frcpa.s1 atan2f_A0,p0 = f1,f9 // Approx to 1/x
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2f_ysq = f8,f8,f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+ fcmp.ge.s1 p8,p9 = f9,f0 // Set p8 if x>=0, p9 if x<0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2f_xy = f9,f8,f0
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ add atan2f_GR_Addr_2 = 0x30, atan2f_GR_Addr_1
+ fmerge.s atan2f_sgn_Y = f8,f1
+ nop.i 999 ;;
+}
+
+{ .mmf
+ ldfpd atan2f_coef_p1,atan2f_coef_p10 = [atan2f_GR_Addr_1],16
+ ldfpd atan2f_coef_p9,atan2f_coef_p8 = [atan2f_GR_Addr_2],16
+ fclass.m p10,p0 = f9,0xe7 // Test x @inf|@snan|@qnan|@zero
+}
+;;
+
+{ .mfi
+ ldfpd atan2f_coef_p7,atan2f_coef_p6 = [atan2f_GR_Addr_1],16
+ fma.s1 atan2f_T_denom = atan2f_Z0,atan2f_xsq,f8
+ nop.i 999
+}
+{ .mfi
+ ldfpd atan2f_coef_p5,atan2f_coef_p4 = [atan2f_GR_Addr_2],16
+ fma.s1 atan2f_Z = atan2f_Z0,f9,f0
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ ldfpd atan2f_coef_p3,atan2f_coef_p2 = [atan2f_GR_Addr_1],16
+ fma.s1 atan2f_G_denom = atan2f_A0,atan2f_ysq,f9
+ nop.i 999
+}
+{ .mfi
+ ldfpd atan2f_const_piby2,atan2f_const_pi = [atan2f_GR_Addr_2],16
+ fma.s1 atan2f_A = atan2f_A0,f8,f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ ldfpd atan2f_const_piby4,atan2f_const_3piby4 = [atan2f_GR_Addr_2]
+ fclass.m p11,p0 = f8,0xe7 // Test y @inf|@snan|@qnan|@zero
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+ fnma.s1 atan2f_T_numer = atan2f_Z0,atan2f_xy,f9
+(p10) br.cond.spnt ATAN2F_XY_INF_NAN_ZERO ;; // Branch on x nan,inf,zero
+}
+
+
+// p6 if |y|>|x|, p7 if |x|>=|y| , use xsq and ysq for test
+{ .mfi
+ nop.m 999
+ fcmp.gt.s1 p6,p7 = atan2f_ysq,atan2f_xsq
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+ fnma.s1 atan2f_G_numer = atan2f_A0,atan2f_xy,f8
+(p11) br.cond.spnt ATAN2F_XY_INF_NAN_ZERO ;; // Branch on y nan,inf,zero
+}
+
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 atan2f_const_1 = atan2f_sgn_Y,f0,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p9) fma.s1 atan2f_const_1 = atan2f_sgn_Y,f1,f0
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p6) fnma.s1 atan2f_U = atan2f_Z,f1,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p6) fma.s1 atan2f_Usq = atan2f_Z,atan2f_Z,f0
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p7) fma.s1 atan2f_U = atan2f_A,f1,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p7) fma.s1 atan2f_Usq = atan2f_A,atan2f_A,f0
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p6) frcpa.s1 atan2f_Q1,p0 = f1,atan2f_T_denom
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p6) fma.s1 atan2f_R_denom = atan2f_T_denom,f1,f0
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p7) frcpa.s1 atan2f_Q1,p0 = f1,atan2f_G_denom
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p7) fma.s1 atan2f_R_denom = atan2f_G_denom,f1,f0
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p6) fnma.s1 atan2f_R_numer = atan2f_T_numer,f1,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p7) fma.s1 atan2f_R_numer = atan2f_G_numer,f1,f0
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p6) fnma.s1 atan2f_p1rnum = atan2f_T_numer,atan2f_coef_p1,f0
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p7) fma.s1 atan2f_p1rnum = atan2f_G_numer,atan2f_coef_p1,f0
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2f_U4 = atan2f_Usq,atan2f_Usq,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2f_poly_u109 = atan2f_Usq,atan2f_coef_p10,atan2f_coef_p9
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2f_poly_u87 = atan2f_Usq,atan2f_coef_p8,atan2f_coef_p7
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2f_poly_u65 = atan2f_Usq,atan2f_coef_p6,atan2f_coef_p5
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2f_poly_u43 = atan2f_Usq,atan2f_coef_p4,atan2f_coef_p3
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fnma.s1 atan2f_Q_beta = atan2f_Q1,atan2f_R_denom,f1
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2f_poly_u21 = atan2f_Usq,atan2f_coef_p2,atan2f_coef_p1
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2f_r = atan2f_Q1,atan2f_R_numer,f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p6) fma.s1 atan2f_C = atan2f_sgn_Y,atan2f_const_piby2,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p7) fma.s1 atan2f_C = atan2f_const_1,atan2f_const_pi,f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2f_U6 = atan2f_U4,atan2f_Usq,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2f_U8 = atan2f_U4,atan2f_U4,f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2f_poly_u10to7 = atan2f_U4,atan2f_poly_u109,atan2f_poly_u87
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2f_pR = atan2f_p1rnum,atan2f_Q1,f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2f_poly_u6to3 = atan2f_U4,atan2f_poly_u65,atan2f_poly_u43
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2f_Q2 = atan2f_Q1,atan2f_Q_beta,atan2f_Q1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2f_Q_beta2 = atan2f_Q_beta,atan2f_Q_beta,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2f_rsq = atan2f_r,atan2f_r,f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2f_poly_u210 = atan2f_Usq,atan2f_poly_u21,f1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+ fcmp.eq.s0 p8,p0 = f8,f9 // Dummy op to set flag on denormal inputs
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2f_poly_u10to3 = atan2f_U8,atan2f_poly_u10to7,atan2f_poly_u6to3
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2f_Q3 = atan2f_Q2,atan2f_Q_beta2,atan2f_Q2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2f_pRC = atan2f_rsq,atan2f_pR,atan2f_C
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2f_poly_u10to0 = atan2f_U6,atan2f_poly_u10to3,atan2f_poly_u210
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2f_pQRC = atan2f_R_numer,atan2f_Q3,atan2f_pRC
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+ fma.s.s0 f8 = atan2f_U,atan2f_poly_u10to0,atan2f_pQRC
+ br.ret.sptk b0 ;;
+}
+
+
+
+ATAN2F_XY_INF_NAN_ZERO:
+
+{ .mfi
+ nop.m 999
+ fclass.m p10,p0 = f8,0xc3 // Is y nan
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fclass.m p12,p0 = f9,0xc3 // Is x nan
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fclass.m p6,p0 = f9,0x21 // Is x +inf
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p10) fma.s f8 = f9,f8,f0 // Result quietized y if y is nan
+(p10) br.ret.spnt b0 // Exit if y is nan
+}
+;;
+
+
+{ .mfi
+ nop.m 999
+(p6) fclass.m.unc p7,p8 = f8,0x23 // x +inf, is y inf
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p12) fnorm.s f8 = f9 // Result quietized x if x is nan, y not nan
+(p12) br.ret.spnt b0 // Exit if x is nan, y not nan
+}
+;;
+
+// Here if x or y inf, or x or y zero
+{ .mfi
+ nop.m 999
+ fcmp.eq.s0 p15,p0 = f8,f9 // Dummy op to set flag on denormal inputs
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fclass.m p11,p12 = f9,0x22 // Is x -inf
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p7) fma.s f8 = atan2f_sgn_Y, atan2f_const_piby4,f0 // Result +-pi/4
+(p7) br.ret.spnt b0 // Exit if x +inf and y inf
+}
+;;
+
+{ .mfb
+ nop.m 999
+(p8) fmerge.s f8 = f8,f0 // If x +inf and y not inf, result +-0
+(p8) br.ret.spnt b0 // Exit if x +inf and y not inf
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p12) fclass.m.unc p13,p0 = f8,0x23 // x not -inf, is y inf
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p11) fclass.m.unc p14,p15 = f8,0x23 // x -inf, is y inf
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fclass.m p6,p7 = f9,0x7 // Is x zero
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p13) fma.s f8 = atan2f_sgn_Y, atan2f_const_piby2,f0 // Result +-pi/2
+(p13) br.ret.spnt b0 // Exit if x not -inf and y inf
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p14) fma.s f8 = atan2f_sgn_Y, atan2f_const_3piby4,f0 // Result +-3pi/4
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p15) fma.s f8 = atan2f_sgn_Y, atan2f_const_pi,f0 // Result +-pi
+(p11) br.ret.spnt b0 // Exit if x -inf
+}
+;;
+
+// Here if x or y zero
+{ .mfi
+ nop.m 999
+(p7) fclass.m.unc p8,p9 = f9,0x19 // x not zero, y zero, is x > zero
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p6) fclass.m.unc p10,p11 = f8,0x7 // x zero, is y zero
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p8) fmerge.s f8 = f8, f0 // x > zero and y zero, result is +-zero
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p9) fma.s f8 = atan2f_sgn_Y, atan2f_const_pi,f0 // x < 0, y 0, result +-pi
+(p10) br.cond.spnt __libm_error_region // Branch if x zero and y zero
+}
+;;
+
+{ .mfb
+ nop.m 999
+(p11) fma.s f8 = atan2f_sgn_Y, atan2f_const_piby2,f0 // x zero, y not zero
+ br.ret.sptk b0 // Final special case exit
+}
+;;
+
+
+.endp atan2f
+ASM_SIZE_DIRECTIVE(atan2f)
+
+
+.proc __libm_error_region
+__libm_error_region:
+.prologue
+ mov GR_Parameter_TAG = 38
+ fclass.m p10,p11 = f9,0x5 // @zero | @pos
+;;
+(p10) fmerge.s f10 = f8, f0
+(p11) fma.s f10 = atan2f_sgn_Y, atan2f_const_pi,f0
+;;
+
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 999
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+}
+;;
+
+{ .mmi
+ stfs [GR_Parameter_Y] = f9,16 // Store Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+}
+;;
+
+
+.body
+{ .mib
+ stfs [GR_Parameter_X] = f8 // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
+}
+{ .mib
+ stfs [GR_Parameter_Y] = f10 // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+}
+;;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+
+{ .mmi
+ ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+}
+;;
+
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+}
+;;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_atan2l.c b/sysdeps/ia64/fpu/e_atan2l.c
new file mode 100644
index 0000000..41254ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_atan2l.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/e_cosh.S b/sysdeps/ia64/fpu/e_cosh.S
new file mode 100644
index 0000000..1ac0e1c
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_cosh.S
@@ -0,0 +1,1142 @@
+.file "cosh.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00 Initial version
+// 4/04/00 Unwind support added
+// 8/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+//
+// API
+//==============================================================
+// double = cosh(double)
+// input floating point f8
+// output floating point f8
+
+
+// Overview of operation
+//==============================================================
+// There are four paths
+
+// 1. |x| < 0.25 COSH_BY_POLY
+// 2. |x| < 32 COSH_BY_TBL
+// 3. |x| < 2^14 COSH_BY_EXP
+// 4. |x_ >= 2^14 COSH_HUGE
+
+// For paths 1, and 2 SAFE is always 1.
+// For path 4, Safe is always 0.
+// SAFE = 1 means we cannot overflow.
+
+#include "libm_support.h"
+
+// Assembly macros
+//==============================================================
+cosh_FR_X = f44
+cosh_FR_SGNX = f40
+
+cosh_FR_Inv_log2by64 = f9
+cosh_FR_log2by64_lo = f11
+cosh_FR_log2by64_hi = f10
+
+cosh_FR_A1 = f9
+cosh_FR_A2 = f10
+cosh_FR_A3 = f11
+
+cosh_FR_Rcub = f12
+cosh_FR_M_temp = f13
+cosh_FR_R_temp = f13
+cosh_FR_Rsq = f13
+cosh_FR_R = f14
+
+cosh_FR_M = f38
+
+cosh_FR_B1 = f15
+cosh_FR_B2 = f32
+cosh_FR_B3 = f33
+
+cosh_FR_peven_temp1 = f34
+cosh_FR_peven_temp2 = f35
+cosh_FR_peven = f36
+
+cosh_FR_podd_temp1 = f34
+cosh_FR_podd_temp2 = f35
+cosh_FR_podd = f37
+
+cosh_FR_J_temp = f9
+cosh_FR_J = f10
+
+cosh_FR_Mmj = f39
+
+cosh_FR_N_temp1 = f11
+cosh_FR_N_temp2 = f12
+cosh_FR_N = f13
+
+cosh_FR_spos = f14
+cosh_FR_sneg = f15
+
+cosh_FR_Tjhi = f32
+cosh_FR_Tjlo = f33
+cosh_FR_Tmjhi = f34
+cosh_FR_Tmjlo = f35
+
+GR_mJ = r35
+GR_J = r36
+
+AD_mJ = r38
+AD_J = r39
+
+cosh_FR_C_hi = f9
+cosh_FR_C_hi_temp = f10
+cosh_FR_C_lo_temp1 = f11
+cosh_FR_C_lo_temp2 = f12
+cosh_FR_C_lo_temp3 = f13
+
+cosh_FR_C_lo = f38
+cosh_FR_S_hi = f39
+
+cosh_FR_S_hi_temp1 = f10
+cosh_FR_Y_hi = f11
+cosh_FR_Y_lo_temp = f12
+cosh_FR_Y_lo = f13
+cosh_FR_COSH = f9
+
+cosh_FR_X2 = f9
+cosh_FR_X4 = f10
+
+cosh_FR_P1 = f14
+cosh_FR_P2 = f15
+cosh_FR_P3 = f32
+cosh_FR_P4 = f33
+cosh_FR_P5 = f34
+cosh_FR_P6 = f35
+
+cosh_FR_TINY_THRESH = f9
+
+cosh_FR_COSH_temp = f10
+cosh_FR_SCALE = f11
+
+cosh_FR_hi_lo = f10
+
+cosh_FR_poly_podd_temp1 = f11
+cosh_FR_poly_podd_temp2 = f13
+cosh_FR_poly_peven_temp1 = f11
+cosh_FR_poly_peven_temp2 = f13
+
+GR_SAVE_PFS = r41
+GR_SAVE_B0 = r42
+GR_SAVE_GP = r43
+
+GR_Parameter_X = r44
+GR_Parameter_Y = r45
+GR_Parameter_RESULT = r46
+
+
+// Data tables
+//==============================================================
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
+.align 16
+double_cosh_arg_reduction:
+ASM_TYPE_DIRECTIVE(double_cosh_arg_reduction,@object)
+ data8 0xB8AA3B295C17F0BC, 0x00004005
+ data8 0xB17217F7D1000000, 0x00003FF8
+ data8 0xCF79ABC9E3B39804, 0x00003FD0
+ASM_SIZE_DIRECTIVE(double_cosh_arg_reduction)
+
+double_cosh_p_table:
+ASM_TYPE_DIRECTIVE(double_cosh_p_table,@object)
+ data8 0x8000000000000000, 0x00003FFE
+ data8 0xAAAAAAAAAAAAAB80, 0x00003FFA
+ data8 0xB60B60B60B4FE884, 0x00003FF5
+ data8 0xD00D00D1021D7370, 0x00003FEF
+ data8 0x93F27740C0C2F1CC, 0x00003FE9
+ data8 0x8FA02AC65BCBD5BC, 0x00003FE2
+ASM_SIZE_DIRECTIVE(double_cosh_p_table)
+
+double_cosh_ab_table:
+ASM_TYPE_DIRECTIVE(double_cosh_ab_table,@object)
+ data8 0xAAAAAAAAAAAAAAAC, 0x00003FFC
+ data8 0x88888888884ECDD5, 0x00003FF8
+ data8 0xD00D0C6DCC26A86B, 0x00003FF2
+ data8 0x8000000000000002, 0x00003FFE
+ data8 0xAAAAAAAAAA402C77, 0x00003FFA
+ data8 0xB60B6CC96BDB144D, 0x00003FF5
+ASM_SIZE_DIRECTIVE(double_cosh_ab_table)
+
+double_cosh_j_table:
+ASM_TYPE_DIRECTIVE(double_cosh_j_table,@object)
+ data8 0xB504F333F9DE6484, 0x00003FFE, 0x1EB2FB13, 0x00000000
+ data8 0xB6FD91E328D17791, 0x00003FFE, 0x1CE2CBE2, 0x00000000
+ data8 0xB8FBAF4762FB9EE9, 0x00003FFE, 0x1DDC3CBC, 0x00000000
+ data8 0xBAFF5AB2133E45FB, 0x00003FFE, 0x1EE9AA34, 0x00000000
+ data8 0xBD08A39F580C36BF, 0x00003FFE, 0x9EAEFDC1, 0x00000000
+ data8 0xBF1799B67A731083, 0x00003FFE, 0x9DBF517B, 0x00000000
+ data8 0xC12C4CCA66709456, 0x00003FFE, 0x1EF88AFB, 0x00000000
+ data8 0xC346CCDA24976407, 0x00003FFE, 0x1E03B216, 0x00000000
+ data8 0xC5672A115506DADD, 0x00003FFE, 0x1E78AB43, 0x00000000
+ data8 0xC78D74C8ABB9B15D, 0x00003FFE, 0x9E7B1747, 0x00000000
+ data8 0xC9B9BD866E2F27A3, 0x00003FFE, 0x9EFE3C0E, 0x00000000
+ data8 0xCBEC14FEF2727C5D, 0x00003FFE, 0x9D36F837, 0x00000000
+ data8 0xCE248C151F8480E4, 0x00003FFE, 0x9DEE53E4, 0x00000000
+ data8 0xD06333DAEF2B2595, 0x00003FFE, 0x9E24AE8E, 0x00000000
+ data8 0xD2A81D91F12AE45A, 0x00003FFE, 0x1D912473, 0x00000000
+ data8 0xD4F35AABCFEDFA1F, 0x00003FFE, 0x1EB243BE, 0x00000000
+ data8 0xD744FCCAD69D6AF4, 0x00003FFE, 0x1E669A2F, 0x00000000
+ data8 0xD99D15C278AFD7B6, 0x00003FFE, 0x9BBC610A, 0x00000000
+ data8 0xDBFBB797DAF23755, 0x00003FFE, 0x1E761035, 0x00000000
+ data8 0xDE60F4825E0E9124, 0x00003FFE, 0x9E0BE175, 0x00000000
+ data8 0xE0CCDEEC2A94E111, 0x00003FFE, 0x1CCB12A1, 0x00000000
+ data8 0xE33F8972BE8A5A51, 0x00003FFE, 0x1D1BFE90, 0x00000000
+ data8 0xE5B906E77C8348A8, 0x00003FFE, 0x1DF2F47A, 0x00000000
+ data8 0xE8396A503C4BDC68, 0x00003FFE, 0x1EF22F22, 0x00000000
+ data8 0xEAC0C6E7DD24392F, 0x00003FFE, 0x9E3F4A29, 0x00000000
+ data8 0xED4F301ED9942B84, 0x00003FFE, 0x1EC01A5B, 0x00000000
+ data8 0xEFE4B99BDCDAF5CB, 0x00003FFE, 0x1E8CAC3A, 0x00000000
+ data8 0xF281773C59FFB13A, 0x00003FFE, 0x9DBB3FAB, 0x00000000
+ data8 0xF5257D152486CC2C, 0x00003FFE, 0x1EF73A19, 0x00000000
+ data8 0xF7D0DF730AD13BB9, 0x00003FFE, 0x9BB795B5, 0x00000000
+ data8 0xFA83B2DB722A033A, 0x00003FFE, 0x1EF84B76, 0x00000000
+ data8 0xFD3E0C0CF486C175, 0x00003FFE, 0x9EF5818B, 0x00000000
+ data8 0x8000000000000000, 0x00003FFF, 0x00000000, 0x00000000
+ data8 0x8164D1F3BC030773, 0x00003FFF, 0x1F77CACA, 0x00000000
+ data8 0x82CD8698AC2BA1D7, 0x00003FFF, 0x1EF8A91D, 0x00000000
+ data8 0x843A28C3ACDE4046, 0x00003FFF, 0x1E57C976, 0x00000000
+ data8 0x85AAC367CC487B15, 0x00003FFF, 0x9EE8DA92, 0x00000000
+ data8 0x871F61969E8D1010, 0x00003FFF, 0x1EE85C9F, 0x00000000
+ data8 0x88980E8092DA8527, 0x00003FFF, 0x1F3BF1AF, 0x00000000
+ data8 0x8A14D575496EFD9A, 0x00003FFF, 0x1D80CA1E, 0x00000000
+ data8 0x8B95C1E3EA8BD6E7, 0x00003FFF, 0x9D0373AF, 0x00000000
+ data8 0x8D1ADF5B7E5BA9E6, 0x00003FFF, 0x9F167097, 0x00000000
+ data8 0x8EA4398B45CD53C0, 0x00003FFF, 0x1EB70051, 0x00000000
+ data8 0x9031DC431466B1DC, 0x00003FFF, 0x1F6EB029, 0x00000000
+ data8 0x91C3D373AB11C336, 0x00003FFF, 0x1DFD6D8E, 0x00000000
+ data8 0x935A2B2F13E6E92C, 0x00003FFF, 0x9EB319B0, 0x00000000
+ data8 0x94F4EFA8FEF70961, 0x00003FFF, 0x1EBA2BEB, 0x00000000
+ data8 0x96942D3720185A00, 0x00003FFF, 0x1F11D537, 0x00000000
+ data8 0x9837F0518DB8A96F, 0x00003FFF, 0x1F0D5A46, 0x00000000
+ data8 0x99E0459320B7FA65, 0x00003FFF, 0x9E5E7BCA, 0x00000000
+ data8 0x9B8D39B9D54E5539, 0x00003FFF, 0x9F3AAFD1, 0x00000000
+ data8 0x9D3ED9A72CFFB751, 0x00003FFF, 0x9E86DACC, 0x00000000
+ data8 0x9EF5326091A111AE, 0x00003FFF, 0x9F3EDDC2, 0x00000000
+ data8 0xA0B0510FB9714FC2, 0x00003FFF, 0x1E496E3D, 0x00000000
+ data8 0xA27043030C496819, 0x00003FFF, 0x9F490BF6, 0x00000000
+ data8 0xA43515AE09E6809E, 0x00003FFF, 0x1DD1DB48, 0x00000000
+ data8 0xA5FED6A9B15138EA, 0x00003FFF, 0x1E65EBFB, 0x00000000
+ data8 0xA7CD93B4E965356A, 0x00003FFF, 0x9F427496, 0x00000000
+ data8 0xA9A15AB4EA7C0EF8, 0x00003FFF, 0x1F283C4A, 0x00000000
+ data8 0xAB7A39B5A93ED337, 0x00003FFF, 0x1F4B0047, 0x00000000
+ data8 0xAD583EEA42A14AC6, 0x00003FFF, 0x1F130152, 0x00000000
+ data8 0xAF3B78AD690A4375, 0x00003FFF, 0x9E8367C0, 0x00000000
+ data8 0xB123F581D2AC2590, 0x00003FFF, 0x9F705F90, 0x00000000
+ data8 0xB311C412A9112489, 0x00003FFF, 0x1EFB3C53, 0x00000000
+ data8 0xB504F333F9DE6484, 0x00003FFF, 0x1F32FB13, 0x00000000
+ASM_SIZE_DIRECTIVE(double_cosh_j_table)
+
+.align 32
+.global cosh#
+
+.section .text
+.proc cosh#
+.align 32
+
+cosh:
+
+#ifdef _LIBC
+.global __ieee754_cosh#
+.proc __ieee754_cosh#
+__ieee754_cosh:
+#endif
+
+// X NAN?
+
+{ .mfi
+ alloc r32 = ar.pfs,0,12,4,0
+(p0) fclass.m.unc p6,p7 = f8, 0xc3 //@snan | @qnan
+ nop.i 999
+}
+;;
+
+
+{ .mfb
+ nop.m 999
+(p6) fma.d.s0 f8 = f8,f1,f8
+(p6) br.ret.spnt b0 ;;
+}
+
+
+// X infinity
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p6,p0 = f8, 0x23 //@inf
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p6) fmerge.s f8 = f0,f8
+(p6) br.ret.spnt b0 ;;
+}
+
+
+
+// Put 0.25 in f9; p6 true if x < 0.25
+{ .mlx
+ nop.m 999
+(p0) movl r32 = 0x000000000000fffd ;;
+}
+
+{ .mfi
+(p0) setf.exp f9 = r32
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fmerge.s cosh_FR_X = f0,f8
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fmerge.s cosh_FR_SGNX = f8,f1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fcmp.lt.unc p0,p7 = cosh_FR_X,f9
+ nop.i 999 ;;
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+(p7) br.cond.sptk L(COSH_BY_TBL)
+}
+;;
+
+
+// COSH_BY_POLY:
+// POLY cannot overflow so there is no need to call __libm_error_support
+// Get the values of P_x from the table
+
+{ .mmi
+ nop.m 999
+(p0) addl r34 = @ltoff(double_cosh_p_table), gp
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ld8 r34 = [r34]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+
+// Calculate cosh_FR_X2 = ax*ax and cosh_FR_X4 = ax*ax*ax*ax
+{ .mmf
+ nop.m 999
+(p0) ldfe cosh_FR_P1 = [r34],16
+(p0) fma.s1 cosh_FR_X2 = cosh_FR_X, cosh_FR_X, f0 ;;
+}
+
+{ .mmi
+(p0) ldfe cosh_FR_P2 = [r34],16 ;;
+(p0) ldfe cosh_FR_P3 = [r34],16
+ nop.i 999 ;;
+}
+
+{ .mmi
+(p0) ldfe cosh_FR_P4 = [r34],16 ;;
+(p0) ldfe cosh_FR_P5 = [r34],16
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p0) ldfe cosh_FR_P6 = [r34],16
+(p0) fma.s1 cosh_FR_X4 = cosh_FR_X2, cosh_FR_X2, f0
+ nop.i 999 ;;
+}
+
+// Calculate cosh_FR_podd = x4 *(x4 * P_5 + P_3) + P_1
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_poly_podd_temp1 = cosh_FR_X4, cosh_FR_P5, cosh_FR_P3
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_podd = cosh_FR_X4, cosh_FR_poly_podd_temp1, cosh_FR_P1
+ nop.i 999
+}
+
+// Calculate cosh_FR_peven = p_even = x4 *(x4 * (x4 * P_6 + P_4) + P_2)
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_poly_peven_temp1 = cosh_FR_X4, cosh_FR_P6, cosh_FR_P4
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_poly_peven_temp2 = cosh_FR_X4, cosh_FR_poly_peven_temp1, cosh_FR_P2
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_peven = cosh_FR_X4, cosh_FR_poly_peven_temp2, f0
+ nop.i 999 ;;
+}
+
+// Y_lo = x2*p_odd + p_even
+// Calculate f8 = Y_hi + Y_lo
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_Y_lo = cosh_FR_X2, cosh_FR_podd, cosh_FR_peven
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p0) fma.d.s0 f8 = f1, f1, cosh_FR_Y_lo
+(p0) br.ret.sptk b0 ;;
+}
+
+
+L(COSH_BY_TBL):
+
+// Now that we are at TBL; so far all we know is that |x| >= 0.25.
+// The first two steps are the same for TBL and EXP, but if we are HUGE
+// Double
+// Go to HUGE if |x| >= 2^10, 10009 (register-biased) is e = 10 (true)
+// Single
+// Go to HUGE if |x| >= 2^7, 10006 (register-biased) is e = 7 (true)
+// we want to leave now. Go to HUGE if |x| >= 2^14
+// 1000d (register-biased) is e = 14 (true)
+
+{ .mlx
+ nop.m 999
+(p0) movl r32 = 0x0000000000010009 ;;
+}
+
+{ .mfi
+(p0) setf.exp f9 = r32
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fcmp.ge.unc p6,p7 = cosh_FR_X,f9
+ nop.i 999 ;;
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+(p6) br.cond.spnt L(COSH_HUGE) ;;
+}
+
+// r32 = 1
+// r34 = N-1
+// r35 = N
+// r36 = j
+// r37 = N+1
+
+// TBL can never overflow
+// cosh(x) = cosh(B+R)
+// = cosh(B) cosh(R) + sinh(B) sinh(R)
+// cosh(R) can be approximated by 1 + p_even
+// sinh(R) can be approximated by p_odd
+
+// ******************************************************
+// STEP 1 (TBL and EXP)
+// ******************************************************
+// Get the following constants.
+// f9 = Inv_log2by64
+// f10 = log2by64_hi
+// f11 = log2by64_lo
+
+{ .mmi
+(p0) adds r32 = 0x1,r0
+(p0) addl r34 = @ltoff(double_cosh_arg_reduction), gp
+ nop.i 999
+}
+;;
+
+// We want 2^(N-1) and 2^(-N-1). So bias N-1 and -N-1 and
+// put them in an exponent.
+// cosh_FR_spos = 2^(N-1) and cosh_FR_sneg = 2^(-N-1)
+// r39 = 0xffff + (N-1) = 0xffff +N -1
+// r40 = 0xffff - (N +1) = 0xffff -N -1
+
+{ .mlx
+ ld8 r34 = [r34]
+(p0) movl r38 = 0x000000000000fffe ;;
+}
+
+{ .mmi
+(p0) ldfe cosh_FR_Inv_log2by64 = [r34],16 ;;
+(p0) ldfe cosh_FR_log2by64_hi = [r34],16
+ nop.i 999 ;;
+}
+
+{ .mbb
+(p0) ldfe cosh_FR_log2by64_lo = [r34],16
+ nop.b 999
+ nop.b 999 ;;
+}
+
+// Get the A coefficients
+// f9 = A_1
+// f10 = A_2
+// f11 = A_3
+
+{ .mmi
+ nop.m 999
+(p0) addl r34 = @ltoff(double_cosh_ab_table), gp
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ld8 r34 = [r34]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+
+// Calculate M and keep it as integer and floating point.
+// M = round-to-integer(x*Inv_log2by64)
+// cosh_FR_M = M = truncate(ax/(log2/64))
+// Put the significand of M in r35
+// and the floating point representation of M in cosh_FR_M
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_M = cosh_FR_X, cosh_FR_Inv_log2by64, f0
+ nop.i 999
+}
+
+{ .mfi
+(p0) ldfe cosh_FR_A1 = [r34],16
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fcvt.fx.s1 cosh_FR_M_temp = cosh_FR_M
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fnorm.s1 cosh_FR_M = cosh_FR_M_temp
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p0) getf.sig r35 = cosh_FR_M_temp
+ nop.f 999
+ nop.i 999 ;;
+}
+
+// M is still in r35. Calculate j. j is the signed extension of the six lsb of M. It
+// has a range of -32 thru 31.
+// r35 = M
+// r36 = j
+{ .mii
+ nop.m 999
+ nop.i 999 ;;
+(p0) and r36 = 0x3f, r35 ;;
+}
+
+// Calculate R
+// f13 = f44 - f12*f10 = x - M*log2by64_hi
+// f14 = f13 - f8*f11 = R = (x - M*log2by64_hi) - M*log2by64_lo
+
+{ .mfi
+ nop.m 999
+(p0) fnma.s1 cosh_FR_R_temp = cosh_FR_M, cosh_FR_log2by64_hi, cosh_FR_X
+ nop.i 999
+}
+
+{ .mfi
+(p0) ldfe cosh_FR_A2 = [r34],16
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fnma.s1 cosh_FR_R = cosh_FR_M, cosh_FR_log2by64_lo, cosh_FR_R_temp
+ nop.i 999
+}
+
+// Get the B coefficients
+// f15 = B_1
+// f32 = B_2
+// f33 = B_3
+
+{ .mmi
+(p0) ldfe cosh_FR_A3 = [r34],16 ;;
+(p0) ldfe cosh_FR_B1 = [r34],16
+ nop.i 999 ;;
+}
+
+{ .mmi
+(p0) ldfe cosh_FR_B2 = [r34],16 ;;
+(p0) ldfe cosh_FR_B3 = [r34],16
+ nop.i 999 ;;
+}
+
+{ .mii
+ nop.m 999
+(p0) shl r34 = r36, 0x2 ;;
+(p0) sxt1 r37 = r34 ;;
+}
+
+// ******************************************************
+// STEP 2 (TBL and EXP)
+// ******************************************************
+// Calculate Rsquared and Rcubed in preparation for p_even and p_odd
+// f12 = R*R*R
+// f13 = R*R
+// f14 = R <== from above
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_Rsq = cosh_FR_R, cosh_FR_R, f0
+(p0) shr r36 = r37, 0x2 ;;
+}
+
+// r34 = M-j = r35 - r36
+// r35 = N = (M-j)/64
+
+{ .mii
+(p0) sub r34 = r35, r36
+ nop.i 999 ;;
+(p0) shr r35 = r34, 0x6 ;;
+}
+
+{ .mii
+(p0) sub r40 = r38, r35
+(p0) adds r37 = 0x1, r35
+(p0) add r39 = r38, r35 ;;
+}
+
+// Get the address of the J table, add the offset,
+// addresses are sinh_AD_mJ and sinh_AD_J, get the T value
+// f32 = T(j)_hi
+// f33 = T(j)_lo
+// f34 = T(-j)_hi
+// f35 = T(-j)_lo
+
+{ .mmi
+(p0) sub r34 = r35, r32
+(p0) addl r37 = @ltoff(double_cosh_j_table), gp
+ nop.i 999
+}
+;;
+
+{ .mfi
+ ld8 r37 = [r37]
+(p0) fma.s1 cosh_FR_Rcub = cosh_FR_Rsq, cosh_FR_R, f0
+ nop.i 999
+}
+
+// ******************************************************
+// STEP 3 Now decide if we need to branch to EXP
+// ******************************************************
+// Put 32 in f9; p6 true if x < 32
+
+{ .mlx
+ nop.m 999
+(p0) movl r32 = 0x0000000000010004 ;;
+}
+
+// Calculate p_even
+// f34 = B_2 + Rsq *B_3
+// f35 = B_1 + Rsq*f34 = B_1 + Rsq * (B_2 + Rsq *B_3)
+// f36 = peven = Rsq * f35 = Rsq * (B_1 + Rsq * (B_2 + Rsq *B_3))
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_peven_temp1 = cosh_FR_Rsq, cosh_FR_B3, cosh_FR_B2
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_peven_temp2 = cosh_FR_Rsq, cosh_FR_peven_temp1, cosh_FR_B1
+ nop.i 999
+}
+
+// Calculate p_odd
+// f34 = A_2 + Rsq *A_3
+// f35 = A_1 + Rsq * (A_2 + Rsq *A_3)
+// f37 = podd = R + Rcub * (A_1 + Rsq * (A_2 + Rsq *A_3))
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_podd_temp1 = cosh_FR_Rsq, cosh_FR_A3, cosh_FR_A2
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p0) setf.exp cosh_FR_N_temp1 = r39
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_peven = cosh_FR_Rsq, cosh_FR_peven_temp2, f0
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_podd_temp2 = cosh_FR_Rsq, cosh_FR_podd_temp1, cosh_FR_A1
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p0) setf.exp f9 = r32
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_podd = cosh_FR_podd_temp2, cosh_FR_Rcub, cosh_FR_R
+ nop.i 999
+}
+
+// sinh_GR_mj contains the table offset for -j
+// sinh_GR_j contains the table offset for +j
+// p6 is true when j <= 0
+
+{ .mlx
+(p0) setf.exp cosh_FR_N_temp2 = r40
+(p0) movl r40 = 0x0000000000000020 ;;
+}
+
+{ .mfi
+(p0) sub GR_mJ = r40, r36
+(p0) fmerge.se cosh_FR_spos = cosh_FR_N_temp1, f1
+(p0) adds GR_J = 0x20, r36 ;;
+}
+
+{ .mii
+ nop.m 999
+(p0) shl GR_mJ = GR_mJ, 5 ;;
+(p0) add AD_mJ = r37, GR_mJ ;;
+}
+
+{ .mmi
+ nop.m 999
+(p0) ldfe cosh_FR_Tmjhi = [AD_mJ],16
+(p0) shl GR_J = GR_J, 5 ;;
+}
+
+{ .mfi
+(p0) ldfs cosh_FR_Tmjlo = [AD_mJ],16
+(p0) fcmp.lt.unc.s1 p6,p7 = cosh_FR_X,f9
+(p0) add AD_J = r37, GR_J ;;
+}
+
+{ .mmi
+(p0) ldfe cosh_FR_Tjhi = [AD_J],16 ;;
+(p0) ldfs cosh_FR_Tjlo = [AD_J],16
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p0) fmerge.se cosh_FR_sneg = cosh_FR_N_temp2, f1
+(p7) br.cond.spnt L(COSH_BY_EXP) ;;
+}
+
+// ******************************************************
+// If NOT branch to EXP
+// ******************************************************
+// Calculate C_hi
+// ******************************************************
+// cosh_FR_C_hi_temp = cosh_FR_sneg * cosh_FR_Tmjhi
+// cosh_FR_C_hi = cosh_FR_spos * cosh_FR_Tjhi + (cosh_FR_sneg * cosh_FR_Tmjhi)
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_C_hi_temp = cosh_FR_sneg, cosh_FR_Tmjhi, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_C_hi = cosh_FR_spos, cosh_FR_Tjhi, cosh_FR_C_hi_temp
+ nop.i 999
+}
+
+// ******************************************************
+// Calculate S_hi
+// ******************************************************
+// cosh_FR_S_hi_temp1 = cosh_FR_sneg * cosh_FR_Tmjhi
+// cosh_FR_S_hi = cosh_FR_spos * cosh_FR_Tjhi - cosh_FR_C_hi_temp1
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_S_hi_temp1 = cosh_FR_sneg, cosh_FR_Tmjhi, f0
+ nop.i 999 ;;
+}
+
+// ******************************************************
+// Calculate C_lo
+// ******************************************************
+// cosh_FR_C_lo_temp1 = cosh_FR_spos * cosh_FR_Tjhi - cosh_FR_C_hi
+// cosh_FR_C_lo_temp2 = cosh_FR_sneg * cosh_FR_Tmjlo + (cosh_FR_spos * cosh_FR_Tjhi - cosh_FR_C_hi)
+// cosh_FR_C_lo_temp1 = cosh_FR_sneg * cosh_FR_Tmjlo
+// cosh_FR_C_lo_temp3 = cosh_FR_spos * cosh_FR_Tjlo + (cosh_FR_sneg * cosh_FR_Tmjlo)
+// cosh_FR_C_lo = cosh_FR_C_lo_temp3 + cosh_FR_C_lo_temp2
+
+{ .mfi
+ nop.m 999
+(p0) fms.s1 cosh_FR_C_lo_temp1 = cosh_FR_spos, cosh_FR_Tjhi, cosh_FR_C_hi
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fms.s1 cosh_FR_S_hi = cosh_FR_spos, cosh_FR_Tjhi, cosh_FR_S_hi_temp1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_C_lo_temp2 = cosh_FR_sneg, cosh_FR_Tmjhi, cosh_FR_C_lo_temp1
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_C_lo_temp1 = cosh_FR_sneg, cosh_FR_Tmjlo, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_C_lo_temp3 = cosh_FR_spos, cosh_FR_Tjlo, cosh_FR_C_lo_temp1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_C_lo = cosh_FR_C_lo_temp3, f1, cosh_FR_C_lo_temp2
+ nop.i 999 ;;
+}
+
+// ******************************************************
+// cosh_FR_Y_lo_temp = cosh_FR_C_hi * cosh_FR_peven + cosh_FR_C_lo
+// cosh_FR_Y_lo = cosh_FR_S_hi * cosh_FR_podd + cosh_FR_Y_lo_temp
+// cosh_FR_COSH = Y_hi + Y_lo
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_Y_lo_temp = cosh_FR_C_hi, cosh_FR_peven, cosh_FR_C_lo
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_Y_lo = cosh_FR_S_hi, cosh_FR_podd, cosh_FR_Y_lo_temp
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p0) fma.d.s0 f8 = cosh_FR_C_hi, f1, cosh_FR_Y_lo
+(p0) br.ret.sptk b0 ;;
+}
+
+L(COSH_BY_EXP):
+
+// When p7 is true, we know that an overflow is not going to happen
+// When p7 is false, we must check for possible overflow
+// p7 is the over_SAFE flag
+// f44 = Scale * (Y_hi + Y_lo)
+// = cosh_FR_spos * (cosh_FR_Tjhi + cosh_FR_Y_lo)
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_Y_lo_temp = cosh_FR_peven, f1, cosh_FR_podd
+ nop.i 999
+}
+
+// Now we are in EXP. This is the only path where an overflow is possible
+// but not for certain. So this is the only path where over_SAFE has any use.
+// r34 still has N-1
+// There is a danger of double-extended overflow if N-1 > 16382 = 0x3ffe
+// There is a danger of double overflow if N-1 > 0x3fe = 1022
+
+{ .mlx
+ nop.m 999
+(p0) movl r32 = 0x00000000000003fe ;;
+}
+
+{ .mfi
+(p0) cmp.gt.unc p0,p7 = r34, r32
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_Y_lo = cosh_FR_Tjhi, cosh_FR_Y_lo_temp, cosh_FR_Tjlo
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_COSH_temp = cosh_FR_Y_lo, f1, cosh_FR_Tjhi
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.d.s0 f44 = cosh_FR_spos, cosh_FR_COSH_temp, f0
+ nop.i 999 ;;
+}
+
+// If over_SAFE is set, return
+{ .mfb
+ nop.m 999
+(p7) fmerge.s f8 = f44,f44
+(p7) br.ret.sptk b0 ;;
+}
+
+// Else see if we overflowed
+// S0 user supplied status
+// S2 user supplied status + WRE + TD (Overflows)
+// If WRE is set then an overflow will not occur in EXP.
+// The input value that would cause a register (WRE) value to overflow is about 2^15
+// and this input would go into the HUGE path.
+// Answer with WRE is in f43.
+
+{ .mfi
+ nop.m 999
+(p0) fsetc.s2 0x7F,0x42
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.d.s2 f43 = cosh_FR_spos, cosh_FR_COSH_temp, f0
+ nop.i 999 ;;
+}
+
+// 103FF => 103FF -FFFF = 400(true)
+// 400 + 3FF = 7FF, which is 1 more that the exponent of the largest
+// double (7FE). So 0 103FF 8000000000000000 is one ulp more than
+// largest double in register bias
+// Now set p8 if the answer with WRE is greater than or equal this value
+// Also set p9 if the answer with WRE is less than or equal to negative this value
+
+{ .mlx
+ nop.m 999
+(p0) movl r32 = 0x00000000000103ff ;;
+}
+
+{ .mmf
+ nop.m 999
+(p0) setf.exp f41 = r32
+(p0) fsetc.s2 0x7F,0x40 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fcmp.ge.unc.s1 p8, p0 = f43, f41
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fmerge.ns f42 = f41, f41
+ nop.i 999 ;;
+}
+
+// The error tag for overflow is 64
+{ .mii
+ nop.m 999
+ nop.i 999 ;;
+(p8) mov r47 = 64 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p0) fcmp.le.unc.s1 p9, p0 = f43, f42
+(p8) br.cond.spnt __libm_error_region ;;
+}
+
+{ .mii
+ nop.m 999
+ nop.i 999 ;;
+(p9) mov r47 = 64
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+(p9) br.cond.spnt __libm_error_region ;;
+}
+
+{ .mfb
+ nop.m 999
+(p0) fmerge.s f8 = f44,f44
+(p0) br.ret.sptk b0 ;;
+}
+
+
+// for COSH_HUGE, put 24000 in exponent; take sign from input; add 1
+// SAFE: SAFE is always 0 for HUGE
+
+L(COSH_HUGE):
+
+{ .mlx
+ nop.m 999
+(p0) movl r32 = 0x0000000000015dbf ;;
+}
+
+{ .mfi
+(p0) setf.exp f9 = r32
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_hi_lo = f1, f9, f1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.d.s0 f44 = f9, cosh_FR_hi_lo, f0
+(p0) mov r47 = 64
+}
+;;
+
+.endp cosh#
+ASM_SIZE_DIRECTIVE(cosh#)
+
+// Stack operations when calling error support.
+// (1) (2) (3) (call) (4)
+// sp -> + psp -> + psp -> + sp -> +
+// | | | |
+// | | <- GR_Y R3 ->| <- GR_RESULT | -> f8
+// | | | |
+// | <-GR_Y Y2->| Y2 ->| <- GR_Y |
+// | | | |
+// | | <- GR_X X1 ->| |
+// | | | |
+// sp-64 -> + sp -> + sp -> + +
+// save ar.pfs save b0 restore gp
+// save gp restore ar.pfs
+
+.proc __libm_error_region
+__libm_error_region:
+.prologue
+// (1)
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+
+
+// (2)
+{ .mmi
+ stfd [GR_Parameter_Y] = f0,16 // STORE Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+
+.body
+// (3)
+{ .mib
+ stfd [GR_Parameter_X] = f8 // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
+}
+{ .mib
+ stfd [GR_Parameter_Y] = f44 // STORE Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+
+// (4)
+{ .mmi
+ ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_coshf.S b/sysdeps/ia64/fpu/e_coshf.S
new file mode 100644
index 0000000..84130ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_coshf.S
@@ -0,0 +1,1133 @@
+.file "coshf.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00 Initial version
+// 2/16/00 The error tag for coshf overflow changed to 65 (from 64).
+// 4/04/00 Unwind support added
+// 8/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+//
+// API
+//==============================================================
+// float = coshf(float)
+// input floating point f8
+// output floating point f8
+
+
+// Overview of operation
+//==============================================================
+// There are four paths
+
+// 1. |x| < 0.25 COSH_BY_POLY
+// 2. |x| < 32 COSH_BY_TBL
+// 3. |x| < 2^14 COSH_BY_EXP
+// 4. |x_ >= 2^14 COSH_HUGE
+
+// For paths 1, and 2 SAFE is always 1.
+// For path 4, Safe is always 0.
+// SAFE = 1 means we cannot overflow.
+
+#include "libm_support.h"
+
+// Assembly macros
+//==============================================================
+coshf_FR_X = f44
+coshf_FR_SGNX = f40
+
+coshf_FR_Inv_log2by64 = f9
+coshf_FR_log2by64_lo = f11
+coshf_FR_log2by64_hi = f10
+
+coshf_FR_A1 = f9
+coshf_FR_A2 = f10
+coshf_FR_A3 = f11
+
+coshf_FR_Rcub = f12
+coshf_FR_M_temp = f13
+coshf_FR_R_temp = f13
+coshf_FR_Rsq = f13
+coshf_FR_R = f14
+
+coshf_FR_M = f38
+
+coshf_FR_B1 = f15
+coshf_FR_B2 = f32
+coshf_FR_B3 = f33
+
+coshf_FR_peven_temp1 = f34
+coshf_FR_peven_temp2 = f35
+coshf_FR_peven = f36
+
+coshf_FR_podd_temp1 = f34
+coshf_FR_podd_temp2 = f35
+coshf_FR_podd = f37
+
+coshf_FR_J_temp = f9
+coshf_FR_J = f10
+
+coshf_FR_Mmj = f39
+
+coshf_FR_N_temp1 = f11
+coshf_FR_N_temp2 = f12
+coshf_FR_N = f13
+
+coshf_FR_spos = f14
+coshf_FR_sneg = f15
+
+coshf_FR_Tjhi = f32
+coshf_FR_Tjlo = f33
+coshf_FR_Tmjhi = f34
+coshf_FR_Tmjlo = f35
+
+GR_mJ = r35
+GR_J = r36
+
+AD_mJ = r38
+AD_J = r39
+
+
+GR_SAVE_B0 = r42
+GR_SAVE_PFS = r41
+GR_SAVE_GP = r43
+
+GR_Parameter_X = r44
+GR_Parameter_Y = r45
+GR_Parameter_RESULT = r46
+GR_Parameter_TAG = r47
+
+FR_X = f8
+FR_Y = f0
+FR_RESULT = f44
+
+
+coshf_FR_C_hi = f9
+coshf_FR_C_hi_temp = f10
+coshf_FR_C_lo_temp1 = f11
+coshf_FR_C_lo_temp2 = f12
+coshf_FR_C_lo_temp3 = f13
+
+coshf_FR_C_lo = f38
+coshf_FR_S_hi = f39
+
+coshf_FR_S_hi_temp1 = f10
+coshf_FR_Y_hi = f11
+coshf_FR_Y_lo_temp = f12
+coshf_FR_Y_lo = f13
+coshf_FR_COSH = f9
+
+coshf_FR_X2 = f9
+coshf_FR_X4 = f10
+
+coshf_FR_P1 = f14
+coshf_FR_P2 = f15
+coshf_FR_P3 = f32
+coshf_FR_P4 = f33
+coshf_FR_P5 = f34
+coshf_FR_P6 = f35
+
+coshf_FR_TINY_THRESH = f9
+
+coshf_FR_COSH_temp = f10
+coshf_FR_SCALE = f11
+
+coshf_FR_hi_lo = f10
+
+coshf_FR_poly_podd_temp1 = f11
+coshf_FR_poly_podd_temp2 = f13
+coshf_FR_poly_peven_temp1 = f11
+coshf_FR_poly_peven_temp2 = f13
+
+// Data tables
+//==============================================================
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
+.align 16
+single_coshf_arg_reduction:
+ASM_TYPE_DIRECTIVE(single_coshf_arg_reduction,@object)
+ data8 0xB8AA3B295C17F0BC, 0x00004005
+ data8 0xB17217F7D1000000, 0x00003FF8
+ data8 0xCF79ABC9E3B39804, 0x00003FD0
+ASM_SIZE_DIRECTIVE(single_coshf_arg_reduction)
+
+single_coshf_p_table:
+ASM_TYPE_DIRECTIVE(single_coshf_p_table,@object)
+ data8 0x8000000000000000, 0x00003FFE
+ data8 0xAAAAAAAAAAAAAB80, 0x00003FFA
+ data8 0xB60B60B60B4FE884, 0x00003FF5
+ data8 0xD00D00D1021D7370, 0x00003FEF
+ data8 0x93F27740C0C2F1CC, 0x00003FE9
+ data8 0x8FA02AC65BCBD5BC, 0x00003FE2
+ASM_SIZE_DIRECTIVE(single_coshf_p_table)
+
+single_coshf_ab_table:
+ASM_TYPE_DIRECTIVE(single_coshf_ab_table,@object)
+ data8 0xAAAAAAAAAAAAAAAC, 0x00003FFC
+ data8 0x88888888884ECDD5, 0x00003FF8
+ data8 0xD00D0C6DCC26A86B, 0x00003FF2
+ data8 0x8000000000000002, 0x00003FFE
+ data8 0xAAAAAAAAAA402C77, 0x00003FFA
+ data8 0xB60B6CC96BDB144D, 0x00003FF5
+ASM_SIZE_DIRECTIVE(single_coshf_ab_table)
+
+single_coshf_j_table:
+ASM_TYPE_DIRECTIVE(single_coshf_j_table,@object)
+ data8 0xB504F333F9DE6484, 0x00003FFE, 0x1EB2FB13, 0x00000000
+ data8 0xB6FD91E328D17791, 0x00003FFE, 0x1CE2CBE2, 0x00000000
+ data8 0xB8FBAF4762FB9EE9, 0x00003FFE, 0x1DDC3CBC, 0x00000000
+ data8 0xBAFF5AB2133E45FB, 0x00003FFE, 0x1EE9AA34, 0x00000000
+ data8 0xBD08A39F580C36BF, 0x00003FFE, 0x9EAEFDC1, 0x00000000
+ data8 0xBF1799B67A731083, 0x00003FFE, 0x9DBF517B, 0x00000000
+ data8 0xC12C4CCA66709456, 0x00003FFE, 0x1EF88AFB, 0x00000000
+ data8 0xC346CCDA24976407, 0x00003FFE, 0x1E03B216, 0x00000000
+ data8 0xC5672A115506DADD, 0x00003FFE, 0x1E78AB43, 0x00000000
+ data8 0xC78D74C8ABB9B15D, 0x00003FFE, 0x9E7B1747, 0x00000000
+ data8 0xC9B9BD866E2F27A3, 0x00003FFE, 0x9EFE3C0E, 0x00000000
+ data8 0xCBEC14FEF2727C5D, 0x00003FFE, 0x9D36F837, 0x00000000
+ data8 0xCE248C151F8480E4, 0x00003FFE, 0x9DEE53E4, 0x00000000
+ data8 0xD06333DAEF2B2595, 0x00003FFE, 0x9E24AE8E, 0x00000000
+ data8 0xD2A81D91F12AE45A, 0x00003FFE, 0x1D912473, 0x00000000
+ data8 0xD4F35AABCFEDFA1F, 0x00003FFE, 0x1EB243BE, 0x00000000
+ data8 0xD744FCCAD69D6AF4, 0x00003FFE, 0x1E669A2F, 0x00000000
+ data8 0xD99D15C278AFD7B6, 0x00003FFE, 0x9BBC610A, 0x00000000
+ data8 0xDBFBB797DAF23755, 0x00003FFE, 0x1E761035, 0x00000000
+ data8 0xDE60F4825E0E9124, 0x00003FFE, 0x9E0BE175, 0x00000000
+ data8 0xE0CCDEEC2A94E111, 0x00003FFE, 0x1CCB12A1, 0x00000000
+ data8 0xE33F8972BE8A5A51, 0x00003FFE, 0x1D1BFE90, 0x00000000
+ data8 0xE5B906E77C8348A8, 0x00003FFE, 0x1DF2F47A, 0x00000000
+ data8 0xE8396A503C4BDC68, 0x00003FFE, 0x1EF22F22, 0x00000000
+ data8 0xEAC0C6E7DD24392F, 0x00003FFE, 0x9E3F4A29, 0x00000000
+ data8 0xED4F301ED9942B84, 0x00003FFE, 0x1EC01A5B, 0x00000000
+ data8 0xEFE4B99BDCDAF5CB, 0x00003FFE, 0x1E8CAC3A, 0x00000000
+ data8 0xF281773C59FFB13A, 0x00003FFE, 0x9DBB3FAB, 0x00000000
+ data8 0xF5257D152486CC2C, 0x00003FFE, 0x1EF73A19, 0x00000000
+ data8 0xF7D0DF730AD13BB9, 0x00003FFE, 0x9BB795B5, 0x00000000
+ data8 0xFA83B2DB722A033A, 0x00003FFE, 0x1EF84B76, 0x00000000
+ data8 0xFD3E0C0CF486C175, 0x00003FFE, 0x9EF5818B, 0x00000000
+ data8 0x8000000000000000, 0x00003FFF, 0x00000000, 0x00000000
+ data8 0x8164D1F3BC030773, 0x00003FFF, 0x1F77CACA, 0x00000000
+ data8 0x82CD8698AC2BA1D7, 0x00003FFF, 0x1EF8A91D, 0x00000000
+ data8 0x843A28C3ACDE4046, 0x00003FFF, 0x1E57C976, 0x00000000
+ data8 0x85AAC367CC487B15, 0x00003FFF, 0x9EE8DA92, 0x00000000
+ data8 0x871F61969E8D1010, 0x00003FFF, 0x1EE85C9F, 0x00000000
+ data8 0x88980E8092DA8527, 0x00003FFF, 0x1F3BF1AF, 0x00000000
+ data8 0x8A14D575496EFD9A, 0x00003FFF, 0x1D80CA1E, 0x00000000
+ data8 0x8B95C1E3EA8BD6E7, 0x00003FFF, 0x9D0373AF, 0x00000000
+ data8 0x8D1ADF5B7E5BA9E6, 0x00003FFF, 0x9F167097, 0x00000000
+ data8 0x8EA4398B45CD53C0, 0x00003FFF, 0x1EB70051, 0x00000000
+ data8 0x9031DC431466B1DC, 0x00003FFF, 0x1F6EB029, 0x00000000
+ data8 0x91C3D373AB11C336, 0x00003FFF, 0x1DFD6D8E, 0x00000000
+ data8 0x935A2B2F13E6E92C, 0x00003FFF, 0x9EB319B0, 0x00000000
+ data8 0x94F4EFA8FEF70961, 0x00003FFF, 0x1EBA2BEB, 0x00000000
+ data8 0x96942D3720185A00, 0x00003FFF, 0x1F11D537, 0x00000000
+ data8 0x9837F0518DB8A96F, 0x00003FFF, 0x1F0D5A46, 0x00000000
+ data8 0x99E0459320B7FA65, 0x00003FFF, 0x9E5E7BCA, 0x00000000
+ data8 0x9B8D39B9D54E5539, 0x00003FFF, 0x9F3AAFD1, 0x00000000
+ data8 0x9D3ED9A72CFFB751, 0x00003FFF, 0x9E86DACC, 0x00000000
+ data8 0x9EF5326091A111AE, 0x00003FFF, 0x9F3EDDC2, 0x00000000
+ data8 0xA0B0510FB9714FC2, 0x00003FFF, 0x1E496E3D, 0x00000000
+ data8 0xA27043030C496819, 0x00003FFF, 0x9F490BF6, 0x00000000
+ data8 0xA43515AE09E6809E, 0x00003FFF, 0x1DD1DB48, 0x00000000
+ data8 0xA5FED6A9B15138EA, 0x00003FFF, 0x1E65EBFB, 0x00000000
+ data8 0xA7CD93B4E965356A, 0x00003FFF, 0x9F427496, 0x00000000
+ data8 0xA9A15AB4EA7C0EF8, 0x00003FFF, 0x1F283C4A, 0x00000000
+ data8 0xAB7A39B5A93ED337, 0x00003FFF, 0x1F4B0047, 0x00000000
+ data8 0xAD583EEA42A14AC6, 0x00003FFF, 0x1F130152, 0x00000000
+ data8 0xAF3B78AD690A4375, 0x00003FFF, 0x9E8367C0, 0x00000000
+ data8 0xB123F581D2AC2590, 0x00003FFF, 0x9F705F90, 0x00000000
+ data8 0xB311C412A9112489, 0x00003FFF, 0x1EFB3C53, 0x00000000
+ data8 0xB504F333F9DE6484, 0x00003FFF, 0x1F32FB13, 0x00000000
+ASM_SIZE_DIRECTIVE(single_coshf_j_table)
+
+.align 32
+.global coshf#
+
+.section .text
+.proc coshf#
+.align 32
+
+coshf:
+
+#ifdef _LIBC
+.global __ieee754_coshf#
+.proc __ieee754_coshf#
+__ieee754_coshf:
+#endif
+
+// X NAN?
+
+
+{ .mfi
+ alloc r32 = ar.pfs,0,12,4,0
+(p0) fclass.m.unc p6,p7 = f8, 0xc3
+ nop.i 999 ;;
+}
+{ .mfb
+ nop.m 999
+(p6) fma.s.s0 f8 = f8,f1,f8
+(p6) br.ret.spnt b0 ;;
+}
+
+{ .mfi
+ nop.m 999
+ nop.f 999
+ nop.i 999 ;;
+}
+
+// X infinity
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p6,p0 = f8, 0x23
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p6) fmerge.s f8 = f0,f8
+(p6) br.ret.spnt b0 ;;
+}
+
+// Put 0.25 in f9; p6 true if x < 0.25
+{ .mlx
+ nop.m 999
+(p0) movl r32 = 0x000000000000fffd ;;
+}
+
+{ .mfi
+(p0) setf.exp f9 = r32
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fmerge.s coshf_FR_X = f0,f8
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fmerge.s coshf_FR_SGNX = f8,f1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fcmp.lt.unc p0,p7 = coshf_FR_X,f9
+ nop.i 999 ;;
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+(p7) br.cond.sptk L(COSH_BY_TBL) ;;
+}
+
+
+// COSH_BY_POLY:
+
+// POLY cannot overflow so there is no need to call __libm_error_support
+// Get the values of P_x from the table
+
+{ .mmi
+ nop.m 999
+(p0) addl r34 = @ltoff(single_coshf_p_table), gp
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ld8 r34 = [r34]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+// Calculate coshf_FR_X2 = ax*ax and coshf_FR_X4 = ax*ax*ax*ax
+{ .mmf
+ nop.m 999
+(p0) ldfe coshf_FR_P1 = [r34],16
+(p0) fma.s1 coshf_FR_X2 = coshf_FR_X, coshf_FR_X, f0 ;;
+}
+
+{ .mmi
+(p0) ldfe coshf_FR_P2 = [r34],16 ;;
+(p0) ldfe coshf_FR_P3 = [r34],16
+ nop.i 999 ;;
+}
+
+{ .mmi
+(p0) ldfe coshf_FR_P4 = [r34],16 ;;
+(p0) ldfe coshf_FR_P5 = [r34],16
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p0) ldfe coshf_FR_P6 = [r34],16
+(p0) fma.s1 coshf_FR_X4 = coshf_FR_X2, coshf_FR_X2, f0
+ nop.i 999 ;;
+}
+
+// Calculate coshf_FR_podd = x4 *(x4 * P_5 + P_3) + P_1
+{ .mfi
+ nop.m 999
+(p0) fma.s1 coshf_FR_poly_podd_temp1 = coshf_FR_X4, coshf_FR_P5, coshf_FR_P3
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 coshf_FR_podd = coshf_FR_X4, coshf_FR_poly_podd_temp1, coshf_FR_P1
+ nop.i 999
+}
+
+// Calculate coshf_FR_peven = p_even = x4 *(x4 * (x4 * P_6 + P_4) + P_2)
+{ .mfi
+ nop.m 999
+(p0) fma.s1 coshf_FR_poly_peven_temp1 = coshf_FR_X4, coshf_FR_P6, coshf_FR_P4
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 coshf_FR_poly_peven_temp2 = coshf_FR_X4, coshf_FR_poly_peven_temp1, coshf_FR_P2
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 coshf_FR_peven = coshf_FR_X4, coshf_FR_poly_peven_temp2, f0
+ nop.i 999 ;;
+}
+
+// Y_lo = x2*p_odd + p_even
+// Calculate f8 = Y_hi + Y_lo
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 coshf_FR_Y_lo = coshf_FR_X2, coshf_FR_podd, coshf_FR_peven
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p0) fma.s.s0 f8 = f1, f1, coshf_FR_Y_lo
+(p0) br.ret.sptk b0 ;;
+}
+
+
+L(COSH_BY_TBL):
+
+// Now that we are at TBL; so far all we know is that |x| >= 0.25.
+// The first two steps are the same for TBL and EXP, but if we are HUGE
+// Double
+// Go to HUGE if |x| >= 2^10, 10009 (register-biased) is e = 10 (true)
+// Single
+// Go to HUGE if |x| >= 2^7, 10006 (register-biased) is e = 7 (true)
+// we want to leave now. Go to HUGE if |x| >= 2^14
+// 1000d (register-biased) is e = 14 (true)
+
+{ .mlx
+ nop.m 999
+(p0) movl r32 = 0x0000000000010006 ;;
+}
+
+{ .mfi
+(p0) setf.exp f9 = r32
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fcmp.ge.unc p6,p7 = coshf_FR_X,f9
+ nop.i 999 ;;
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+(p6) br.cond.spnt L(COSH_HUGE) ;;
+}
+
+// r32 = 1
+// r34 = N-1
+// r35 = N
+// r36 = j
+// r37 = N+1
+
+// TBL can never overflow
+// coshf(x) = coshf(B+R)
+// = coshf(B) coshf(R) + sinh(B) sinh(R)
+// coshf(R) can be approximated by 1 + p_even
+// sinh(R) can be approximated by p_odd
+
+// ******************************************************
+// STEP 1 (TBL and EXP)
+// ******************************************************
+// Get the following constants.
+// f9 = Inv_log2by64
+// f10 = log2by64_hi
+// f11 = log2by64_lo
+
+{ .mmi
+(p0) adds r32 = 0x1,r0
+(p0) addl r34 = @ltoff(single_coshf_arg_reduction), gp
+ nop.i 999
+}
+;;
+
+
+// We want 2^(N-1) and 2^(-N-1). So bias N-1 and -N-1 and
+// put them in an exponent.
+// coshf_FR_spos = 2^(N-1) and coshf_FR_sneg = 2^(-N-1)
+// r39 = 0xffff + (N-1) = 0xffff +N -1
+// r40 = 0xffff - (N +1) = 0xffff -N -1
+
+{ .mlx
+ ld8 r34 = [r34]
+(p0) movl r38 = 0x000000000000fffe ;;
+}
+
+{ .mmi
+(p0) ldfe coshf_FR_Inv_log2by64 = [r34],16 ;;
+(p0) ldfe coshf_FR_log2by64_hi = [r34],16
+ nop.i 999 ;;
+}
+
+{ .mbb
+(p0) ldfe coshf_FR_log2by64_lo = [r34],16
+ nop.b 999
+ nop.b 999 ;;
+}
+
+// Get the A coefficients
+// f9 = A_1
+// f10 = A_2
+// f11 = A_3
+
+{ .mmi
+ nop.m 999
+(p0) addl r34 = @ltoff(single_coshf_ab_table), gp
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ld8 r34 = [r34]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+
+// Calculate M and keep it as integer and floating point.
+// M = round-to-integer(x*Inv_log2by64)
+// coshf_FR_M = M = truncate(ax/(log2/64))
+// Put the significand of M in r35
+// and the floating point representation of M in coshf_FR_M
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 coshf_FR_M = coshf_FR_X, coshf_FR_Inv_log2by64, f0
+ nop.i 999
+}
+
+{ .mfi
+(p0) ldfe coshf_FR_A1 = [r34],16
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fcvt.fx.s1 coshf_FR_M_temp = coshf_FR_M
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fnorm.s1 coshf_FR_M = coshf_FR_M_temp
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p0) getf.sig r35 = coshf_FR_M_temp
+ nop.f 999
+ nop.i 999 ;;
+}
+
+// M is still in r35. Calculate j. j is the signed extension of the six lsb of M. It
+// has a range of -32 thru 31.
+// r35 = M
+// r36 = j
+
+{ .mii
+ nop.m 999
+ nop.i 999 ;;
+(p0) and r36 = 0x3f, r35 ;;
+}
+
+// Calculate R
+// f13 = f44 - f12*f10 = x - M*log2by64_hi
+// f14 = f13 - f8*f11 = R = (x - M*log2by64_hi) - M*log2by64_lo
+
+{ .mfi
+ nop.m 999
+(p0) fnma.s1 coshf_FR_R_temp = coshf_FR_M, coshf_FR_log2by64_hi, coshf_FR_X
+ nop.i 999
+}
+
+{ .mfi
+(p0) ldfe coshf_FR_A2 = [r34],16
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fnma.s1 coshf_FR_R = coshf_FR_M, coshf_FR_log2by64_lo, coshf_FR_R_temp
+ nop.i 999
+}
+
+// Get the B coefficients
+// f15 = B_1
+// f32 = B_2
+// f33 = B_3
+
+{ .mmi
+(p0) ldfe coshf_FR_A3 = [r34],16 ;;
+(p0) ldfe coshf_FR_B1 = [r34],16
+ nop.i 999 ;;
+}
+
+{ .mmi
+(p0) ldfe coshf_FR_B2 = [r34],16 ;;
+(p0) ldfe coshf_FR_B3 = [r34],16
+ nop.i 999 ;;
+}
+
+{ .mii
+ nop.m 999
+(p0) shl r34 = r36, 0x2 ;;
+(p0) sxt1 r37 = r34 ;;
+}
+
+// ******************************************************
+// STEP 2 (TBL and EXP)
+// ******************************************************
+// Calculate Rsquared and Rcubed in preparation for p_even and p_odd
+// f12 = R*R*R
+// f13 = R*R
+// f14 = R <== from above
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 coshf_FR_Rsq = coshf_FR_R, coshf_FR_R, f0
+(p0) shr r36 = r37, 0x2 ;;
+}
+
+// r34 = M-j = r35 - r36
+// r35 = N = (M-j)/64
+
+{ .mii
+(p0) sub r34 = r35, r36
+ nop.i 999 ;;
+(p0) shr r35 = r34, 0x6 ;;
+}
+
+{ .mii
+(p0) sub r40 = r38, r35
+(p0) adds r37 = 0x1, r35
+(p0) add r39 = r38, r35 ;;
+}
+
+// Get the address of the J table, add the offset,
+// addresses are sinh_AD_mJ and sinh_AD_J, get the T value
+// f32 = T(j)_hi
+// f33 = T(j)_lo
+// f34 = T(-j)_hi
+// f35 = T(-j)_lo
+
+{ .mmi
+(p0) sub r34 = r35, r32
+(p0) addl r37 = @ltoff(single_coshf_j_table), gp
+ nop.i 999
+}
+;;
+
+{ .mfi
+ ld8 r37 = [r37]
+(p0) fma.s1 coshf_FR_Rcub = coshf_FR_Rsq, coshf_FR_R, f0
+ nop.i 999
+}
+
+// ******************************************************
+// STEP 3 Now decide if we need to branch to EXP
+// ******************************************************
+// Put 32 in f9; p6 true if x < 32
+
+{ .mlx
+ nop.m 999
+(p0) movl r32 = 0x0000000000010004 ;;
+}
+
+// Calculate p_even
+// f34 = B_2 + Rsq *B_3
+// f35 = B_1 + Rsq*f34 = B_1 + Rsq * (B_2 + Rsq *B_3)
+// f36 = peven = Rsq * f35 = Rsq * (B_1 + Rsq * (B_2 + Rsq *B_3))
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 coshf_FR_peven_temp1 = coshf_FR_Rsq, coshf_FR_B3, coshf_FR_B2
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 coshf_FR_peven_temp2 = coshf_FR_Rsq, coshf_FR_peven_temp1, coshf_FR_B1
+ nop.i 999
+}
+
+// Calculate p_odd
+// f34 = A_2 + Rsq *A_3
+// f35 = A_1 + Rsq * (A_2 + Rsq *A_3)
+// f37 = podd = R + Rcub * (A_1 + Rsq * (A_2 + Rsq *A_3))
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 coshf_FR_podd_temp1 = coshf_FR_Rsq, coshf_FR_A3, coshf_FR_A2
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p0) setf.exp coshf_FR_N_temp1 = r39
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 coshf_FR_peven = coshf_FR_Rsq, coshf_FR_peven_temp2, f0
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 coshf_FR_podd_temp2 = coshf_FR_Rsq, coshf_FR_podd_temp1, coshf_FR_A1
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p0) setf.exp f9 = r32
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 coshf_FR_podd = coshf_FR_podd_temp2, coshf_FR_Rcub, coshf_FR_R
+ nop.i 999
+}
+
+// sinh_GR_mj contains the table offset for -j
+// sinh_GR_j contains the table offset for +j
+// p6 is true when j <= 0
+
+{ .mlx
+(p0) setf.exp coshf_FR_N_temp2 = r40
+(p0) movl r40 = 0x0000000000000020 ;;
+}
+
+{ .mfi
+(p0) sub GR_mJ = r40, r36
+(p0) fmerge.se coshf_FR_spos = coshf_FR_N_temp1, f1
+(p0) adds GR_J = 0x20, r36 ;;
+}
+
+{ .mii
+ nop.m 999
+(p0) shl GR_mJ = GR_mJ, 5 ;;
+(p0) add AD_mJ = r37, GR_mJ ;;
+}
+
+{ .mmi
+ nop.m 999
+(p0) ldfe coshf_FR_Tmjhi = [AD_mJ],16
+(p0) shl GR_J = GR_J, 5 ;;
+}
+
+{ .mfi
+(p0) ldfs coshf_FR_Tmjlo = [AD_mJ],16
+(p0) fcmp.lt.unc.s1 p6,p7 = coshf_FR_X,f9
+(p0) add AD_J = r37, GR_J ;;
+}
+
+{ .mmi
+(p0) ldfe coshf_FR_Tjhi = [AD_J],16 ;;
+(p0) ldfs coshf_FR_Tjlo = [AD_J],16
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p0) fmerge.se coshf_FR_sneg = coshf_FR_N_temp2, f1
+(p7) br.cond.spnt L(COSH_BY_EXP) ;;
+}
+
+// ******************************************************
+// If NOT branch to EXP
+// ******************************************************
+// Calculate C_hi
+// ******************************************************
+// coshf_FR_C_hi_temp = coshf_FR_sneg * coshf_FR_Tmjhi
+// coshf_FR_C_hi = coshf_FR_spos * coshf_FR_Tjhi + (coshf_FR_sneg * coshf_FR_Tmjhi)
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 coshf_FR_C_hi_temp = coshf_FR_sneg, coshf_FR_Tmjhi, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 coshf_FR_C_hi = coshf_FR_spos, coshf_FR_Tjhi, coshf_FR_C_hi_temp
+ nop.i 999
+}
+
+// ******************************************************
+// Calculate S_hi
+// ******************************************************
+// coshf_FR_S_hi_temp1 = coshf_FR_sneg * coshf_FR_Tmjhi
+// coshf_FR_S_hi = coshf_FR_spos * coshf_FR_Tjhi - coshf_FR_C_hi_temp1
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 coshf_FR_S_hi_temp1 = coshf_FR_sneg, coshf_FR_Tmjhi, f0
+ nop.i 999 ;;
+}
+
+// ******************************************************
+// Calculate C_lo
+// ******************************************************
+// coshf_FR_C_lo_temp1 = coshf_FR_spos * coshf_FR_Tjhi - coshf_FR_C_hi
+// coshf_FR_C_lo_temp2 = coshf_FR_sneg * coshf_FR_Tmjlo + (coshf_FR_spos * coshf_FR_Tjhi - coshf_FR_C_hi)
+// coshf_FR_C_lo_temp1 = coshf_FR_sneg * coshf_FR_Tmjlo
+// coshf_FR_C_lo_temp3 = coshf_FR_spos * coshf_FR_Tjlo + (coshf_FR_sneg * coshf_FR_Tmjlo)
+// coshf_FR_C_lo = coshf_FR_C_lo_temp3 + coshf_FR_C_lo_temp2
+
+{ .mfi
+ nop.m 999
+(p0) fms.s1 coshf_FR_C_lo_temp1 = coshf_FR_spos, coshf_FR_Tjhi, coshf_FR_C_hi
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fms.s1 coshf_FR_S_hi = coshf_FR_spos, coshf_FR_Tjhi, coshf_FR_S_hi_temp1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 coshf_FR_C_lo_temp2 = coshf_FR_sneg, coshf_FR_Tmjhi, coshf_FR_C_lo_temp1
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 coshf_FR_C_lo_temp1 = coshf_FR_sneg, coshf_FR_Tmjlo, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 coshf_FR_C_lo_temp3 = coshf_FR_spos, coshf_FR_Tjlo, coshf_FR_C_lo_temp1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 coshf_FR_C_lo = coshf_FR_C_lo_temp3, f1, coshf_FR_C_lo_temp2
+ nop.i 999 ;;
+}
+
+// ******************************************************
+// coshf_FR_Y_lo_temp = coshf_FR_C_hi * coshf_FR_peven + coshf_FR_C_lo
+// coshf_FR_Y_lo = coshf_FR_S_hi * coshf_FR_podd + coshf_FR_Y_lo_temp
+// coshf_FR_COSH = Y_hi + Y_lo
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 coshf_FR_Y_lo_temp = coshf_FR_C_hi, coshf_FR_peven, coshf_FR_C_lo
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 coshf_FR_Y_lo = coshf_FR_S_hi, coshf_FR_podd, coshf_FR_Y_lo_temp
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p0) fma.s.s0 f8 = coshf_FR_C_hi, f1, coshf_FR_Y_lo
+(p0) br.ret.sptk b0 ;;
+}
+
+
+L(COSH_BY_EXP):
+
+// When p7 is true, we know that an overflow is not going to happen
+// When p7 is false, we must check for possible overflow
+// p7 is the over_SAFE flag
+// f44 = Scale * (Y_hi + Y_lo)
+// = coshf_FR_spos * (coshf_FR_Tjhi + coshf_FR_Y_lo)
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 coshf_FR_Y_lo_temp = coshf_FR_peven, f1, coshf_FR_podd
+ nop.i 999
+}
+
+// Now we are in EXP. This is the only path where an overflow is possible
+// but not for certain. So this is the only path where over_SAFE has any use.
+// r34 still has N-1
+// There is a danger of double-extended overflow if N-1 > 16382 = 0x3ffe
+// There is a danger of double overflow if N-1 > 0x3fe = 1022
+// There is a danger of single overflow if N-1 > 0x7e = 126
+
+{ .mlx
+ nop.m 999
+(p0) movl r32 = 0x000000000000007e ;;
+}
+
+{ .mfi
+(p0) cmp.gt.unc p0,p7 = r34, r32
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 coshf_FR_Y_lo = coshf_FR_Tjhi, coshf_FR_Y_lo_temp, coshf_FR_Tjlo
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 coshf_FR_COSH_temp = coshf_FR_Y_lo, f1, coshf_FR_Tjhi
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s.s0 f44 = coshf_FR_spos, coshf_FR_COSH_temp, f0
+ nop.i 999 ;;
+}
+
+// If over_SAFE is set, return
+{ .mfb
+ nop.m 999
+(p7) fmerge.s f8 = f44,f44
+(p7) br.ret.sptk b0 ;;
+}
+
+// Else see if we overflowed
+// S0 user supplied status
+// S2 user supplied status + WRE + TD (Overflows)
+// If WRE is set then an overflow will not occur in EXP.
+// The input value that would cause a register (WRE) value to overflow is about 2^15
+// and this input would go into the HUGE path.
+// Answer with WRE is in f43.
+
+{ .mfi
+ nop.m 999
+(p0) fsetc.s2 0x7F,0x42
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s.s2 f43 = coshf_FR_spos, coshf_FR_COSH_temp, f0
+ nop.i 999 ;;
+}
+
+// 1 more that the exponent of the largest double (7FE) = 7FF
+// 7FF - 3FF = 400 (true); 400 + FFFF = 103FF (register-biased)
+// So 0 103FF 8000000000000000 is one ulp more than
+// largest double in register bias
+// 1 more that the exponent of the largest single (FE) = FF
+// FF - 7F = 80 (true); 80 + FFFF = 1007F (register-biased)
+// Now set p8 if the answer with WRE is greater than or equal this value
+// Also set p9 if the answer with WRE is less than or equal to negative this value
+
+{ .mlx
+ nop.m 999
+(p0) movl r32 = 0x000000000001007f ;;
+}
+
+{ .mmf
+ nop.m 999
+(p0) setf.exp f41 = r32
+(p0) fsetc.s2 0x7F,0x40 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fcmp.ge.unc.s1 p8, p0 = f43, f41
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fmerge.ns f42 = f41, f41
+ nop.i 999 ;;
+}
+
+// The error tag for overflow is 65
+{ .mii
+ nop.m 999
+ nop.i 999 ;;
+(p8) mov GR_Parameter_TAG = 65 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p0) fcmp.le.unc.s1 p9, p0 = f43, f42
+(p8) br.cond.spnt __libm_error_region ;;
+}
+
+{ .mii
+ nop.m 999
+ nop.i 999 ;;
+(p9) mov GR_Parameter_TAG = 64
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+(p9) br.cond.spnt __libm_error_region ;;
+}
+
+{ .mfb
+ nop.m 999
+(p0) fmerge.s f8 = f44,f44
+(p0) br.ret.sptk b0 ;;
+}
+
+
+L(COSH_HUGE):
+
+// for COSH_HUGE, put 24000 in exponent; take sign from input; add 1
+// SAFE: SAFE is always 0 for HUGE
+
+{ .mlx
+ nop.m 999
+(p0) movl r32 = 0x0000000000015dbf ;;
+}
+
+{ .mfi
+(p0) setf.exp f9 = r32
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 coshf_FR_hi_lo = f1, f9, f1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s.s0 f44 = f9, coshf_FR_hi_lo, f0
+(p0) mov GR_Parameter_TAG = 65
+}
+.endp coshf
+ASM_SIZE_DIRECTIVE(coshf)
+
+
+.proc __libm_error_region
+__libm_error_region:
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfs [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mib
+ stfs [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
+}
+{ .mib
+ stfs [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk.many b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+{ .mmi
+ ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_coshl.S b/sysdeps/ia64/fpu/e_coshl.S
new file mode 100644
index 0000000..97486f6
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_coshl.S
@@ -0,0 +1,1150 @@
+.file "coshl.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00 Initial version
+// 4/04/00 Unwind support added
+// 8/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+// 1/23/01 Set inexact flag for large args.
+//
+// API
+//==============================================================
+// float = cosh(float)
+// double = cosh(double)
+// long double = coshl(long double)
+// input floating point f8
+// output floating point f8
+
+
+// Overview of operation
+//==============================================================
+// There are four paths
+
+// 1. |x| < 0.25 COSH_BY_POLY
+// 2. |x| < 32 COSH_BY_TBL
+// 3. |x| < 2^14 COSH_BY_EXP
+// 4. |x| >= 2^14 COSH_HUGE
+
+// For paths 1, and 2 SAFE is always 1.
+// For path 4, Safe is always 0.
+// SAFE = 1 means we cannot overflow.
+
+#include "libm_support.h"
+
+// Assembly macros
+//==============================================================
+cosh_FR_X = f44
+FR_RESULT = f44
+cosh_FR_SGNX = f40
+cosh_FR_all_ones = f45
+
+FR_X = f8
+FR_Y = f0
+cosh_FR_Inv_log2by64 = f9
+cosh_FR_log2by64_lo = f11
+cosh_FR_log2by64_hi = f10
+
+cosh_FR_A1 = f9
+cosh_FR_A2 = f10
+cosh_FR_A3 = f11
+
+cosh_FR_Rcub = f12
+cosh_FR_M_temp = f13
+cosh_FR_R_temp = f13
+cosh_FR_Rsq = f13
+cosh_FR_R = f14
+
+cosh_FR_M = f38
+
+cosh_FR_tmp = f15
+cosh_FR_B1 = f15
+cosh_FR_B2 = f32
+cosh_FR_B3 = f33
+
+cosh_FR_peven_temp1 = f34
+cosh_FR_peven_temp2 = f35
+cosh_FR_peven = f36
+
+cosh_FR_podd_temp1 = f34
+cosh_FR_podd_temp2 = f35
+cosh_FR_podd = f37
+
+cosh_FR_J_temp = f9
+cosh_FR_J = f10
+
+cosh_FR_Mmj = f39
+
+cosh_FR_N_temp1 = f11
+cosh_FR_N_temp2 = f12
+cosh_FR_N = f13
+
+cosh_FR_spos = f14
+cosh_FR_sneg = f15
+
+cosh_FR_Tjhi = f32
+cosh_FR_Tjlo = f33
+cosh_FR_Tmjhi = f34
+cosh_FR_Tmjlo = f35
+
+GR_mJ = r35
+GR_J = r36
+
+AD_mJ = r38
+AD_J = r39
+
+cosh_GR_all_ones = r40
+
+GR_SAVE_PFS = r41
+GR_SAVE_B0 = r42
+GR_SAVE_GP = r43
+GR_Parameter_X = r44
+GR_Parameter_Y = r45
+GR_Parameter_RESULT = r46
+GR_Parameter_TAG = r47
+
+cosh_FR_C_hi = f9
+cosh_FR_C_hi_temp = f10
+cosh_FR_C_lo_temp1 = f11
+cosh_FR_C_lo_temp2 = f12
+cosh_FR_C_lo_temp3 = f13
+
+cosh_FR_C_lo = f38
+cosh_FR_S_hi = f39
+
+cosh_FR_S_hi_temp1 = f10
+cosh_FR_Y_hi = f11
+cosh_FR_Y_lo_temp = f12
+cosh_FR_Y_lo = f13
+cosh_FR_COSH = f9
+
+cosh_FR_X2 = f9
+cosh_FR_X4 = f10
+
+cosh_FR_P1 = f14
+cosh_FR_P2 = f15
+cosh_FR_P3 = f32
+cosh_FR_P4 = f33
+cosh_FR_P5 = f34
+cosh_FR_P6 = f35
+
+cosh_FR_TINY_THRESH = f9
+
+cosh_FR_COSH_temp = f10
+cosh_FR_SCALE = f11
+
+cosh_FR_hi_lo = f10
+
+cosh_FR_poly_podd_temp1 = f11
+cosh_FR_poly_podd_temp2 = f13
+cosh_FR_poly_peven_temp1 = f11
+cosh_FR_poly_peven_temp2 = f13
+
+// Data tables
+//==============================================================
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
+.align 16
+double_cosh_arg_reduction:
+ASM_TYPE_DIRECTIVE(double_cosh_arg_reduction,@object)
+ data8 0xB8AA3B295C17F0BC, 0x00004005
+ data8 0xB17217F7D1000000, 0x00003FF8
+ data8 0xCF79ABC9E3B39804, 0x00003FD0
+ASM_SIZE_DIRECTIVE(double_cosh_arg_reduction)
+
+double_cosh_p_table:
+ASM_TYPE_DIRECTIVE(double_cosh_p_table,@object)
+ data8 0x8000000000000000, 0x00003FFE
+ data8 0xAAAAAAAAAAAAAB80, 0x00003FFA
+ data8 0xB60B60B60B4FE884, 0x00003FF5
+ data8 0xD00D00D1021D7370, 0x00003FEF
+ data8 0x93F27740C0C2F1CC, 0x00003FE9
+ data8 0x8FA02AC65BCBD5BC, 0x00003FE2
+ASM_SIZE_DIRECTIVE(double_cosh_p_table)
+
+double_cosh_ab_table:
+ASM_TYPE_DIRECTIVE(double_cosh_ab_table,@object)
+ data8 0xAAAAAAAAAAAAAAAC, 0x00003FFC
+ data8 0x88888888884ECDD5, 0x00003FF8
+ data8 0xD00D0C6DCC26A86B, 0x00003FF2
+ data8 0x8000000000000002, 0x00003FFE
+ data8 0xAAAAAAAAAA402C77, 0x00003FFA
+ data8 0xB60B6CC96BDB144D, 0x00003FF5
+ASM_SIZE_DIRECTIVE(double_cosh_ab_table)
+
+double_cosh_j_table:
+ASM_TYPE_DIRECTIVE(double_cosh_j_table,@object)
+ data8 0xB504F333F9DE6484, 0x00003FFE, 0x1EB2FB13, 0x00000000
+ data8 0xB6FD91E328D17791, 0x00003FFE, 0x1CE2CBE2, 0x00000000
+ data8 0xB8FBAF4762FB9EE9, 0x00003FFE, 0x1DDC3CBC, 0x00000000
+ data8 0xBAFF5AB2133E45FB, 0x00003FFE, 0x1EE9AA34, 0x00000000
+ data8 0xBD08A39F580C36BF, 0x00003FFE, 0x9EAEFDC1, 0x00000000
+ data8 0xBF1799B67A731083, 0x00003FFE, 0x9DBF517B, 0x00000000
+ data8 0xC12C4CCA66709456, 0x00003FFE, 0x1EF88AFB, 0x00000000
+ data8 0xC346CCDA24976407, 0x00003FFE, 0x1E03B216, 0x00000000
+ data8 0xC5672A115506DADD, 0x00003FFE, 0x1E78AB43, 0x00000000
+ data8 0xC78D74C8ABB9B15D, 0x00003FFE, 0x9E7B1747, 0x00000000
+ data8 0xC9B9BD866E2F27A3, 0x00003FFE, 0x9EFE3C0E, 0x00000000
+ data8 0xCBEC14FEF2727C5D, 0x00003FFE, 0x9D36F837, 0x00000000
+ data8 0xCE248C151F8480E4, 0x00003FFE, 0x9DEE53E4, 0x00000000
+ data8 0xD06333DAEF2B2595, 0x00003FFE, 0x9E24AE8E, 0x00000000
+ data8 0xD2A81D91F12AE45A, 0x00003FFE, 0x1D912473, 0x00000000
+ data8 0xD4F35AABCFEDFA1F, 0x00003FFE, 0x1EB243BE, 0x00000000
+ data8 0xD744FCCAD69D6AF4, 0x00003FFE, 0x1E669A2F, 0x00000000
+ data8 0xD99D15C278AFD7B6, 0x00003FFE, 0x9BBC610A, 0x00000000
+ data8 0xDBFBB797DAF23755, 0x00003FFE, 0x1E761035, 0x00000000
+ data8 0xDE60F4825E0E9124, 0x00003FFE, 0x9E0BE175, 0x00000000
+ data8 0xE0CCDEEC2A94E111, 0x00003FFE, 0x1CCB12A1, 0x00000000
+ data8 0xE33F8972BE8A5A51, 0x00003FFE, 0x1D1BFE90, 0x00000000
+ data8 0xE5B906E77C8348A8, 0x00003FFE, 0x1DF2F47A, 0x00000000
+ data8 0xE8396A503C4BDC68, 0x00003FFE, 0x1EF22F22, 0x00000000
+ data8 0xEAC0C6E7DD24392F, 0x00003FFE, 0x9E3F4A29, 0x00000000
+ data8 0xED4F301ED9942B84, 0x00003FFE, 0x1EC01A5B, 0x00000000
+ data8 0xEFE4B99BDCDAF5CB, 0x00003FFE, 0x1E8CAC3A, 0x00000000
+ data8 0xF281773C59FFB13A, 0x00003FFE, 0x9DBB3FAB, 0x00000000
+ data8 0xF5257D152486CC2C, 0x00003FFE, 0x1EF73A19, 0x00000000
+ data8 0xF7D0DF730AD13BB9, 0x00003FFE, 0x9BB795B5, 0x00000000
+ data8 0xFA83B2DB722A033A, 0x00003FFE, 0x1EF84B76, 0x00000000
+ data8 0xFD3E0C0CF486C175, 0x00003FFE, 0x9EF5818B, 0x00000000
+ data8 0x8000000000000000, 0x00003FFF, 0x00000000, 0x00000000
+ data8 0x8164D1F3BC030773, 0x00003FFF, 0x1F77CACA, 0x00000000
+ data8 0x82CD8698AC2BA1D7, 0x00003FFF, 0x1EF8A91D, 0x00000000
+ data8 0x843A28C3ACDE4046, 0x00003FFF, 0x1E57C976, 0x00000000
+ data8 0x85AAC367CC487B15, 0x00003FFF, 0x9EE8DA92, 0x00000000
+ data8 0x871F61969E8D1010, 0x00003FFF, 0x1EE85C9F, 0x00000000
+ data8 0x88980E8092DA8527, 0x00003FFF, 0x1F3BF1AF, 0x00000000
+ data8 0x8A14D575496EFD9A, 0x00003FFF, 0x1D80CA1E, 0x00000000
+ data8 0x8B95C1E3EA8BD6E7, 0x00003FFF, 0x9D0373AF, 0x00000000
+ data8 0x8D1ADF5B7E5BA9E6, 0x00003FFF, 0x9F167097, 0x00000000
+ data8 0x8EA4398B45CD53C0, 0x00003FFF, 0x1EB70051, 0x00000000
+ data8 0x9031DC431466B1DC, 0x00003FFF, 0x1F6EB029, 0x00000000
+ data8 0x91C3D373AB11C336, 0x00003FFF, 0x1DFD6D8E, 0x00000000
+ data8 0x935A2B2F13E6E92C, 0x00003FFF, 0x9EB319B0, 0x00000000
+ data8 0x94F4EFA8FEF70961, 0x00003FFF, 0x1EBA2BEB, 0x00000000
+ data8 0x96942D3720185A00, 0x00003FFF, 0x1F11D537, 0x00000000
+ data8 0x9837F0518DB8A96F, 0x00003FFF, 0x1F0D5A46, 0x00000000
+ data8 0x99E0459320B7FA65, 0x00003FFF, 0x9E5E7BCA, 0x00000000
+ data8 0x9B8D39B9D54E5539, 0x00003FFF, 0x9F3AAFD1, 0x00000000
+ data8 0x9D3ED9A72CFFB751, 0x00003FFF, 0x9E86DACC, 0x00000000
+ data8 0x9EF5326091A111AE, 0x00003FFF, 0x9F3EDDC2, 0x00000000
+ data8 0xA0B0510FB9714FC2, 0x00003FFF, 0x1E496E3D, 0x00000000
+ data8 0xA27043030C496819, 0x00003FFF, 0x9F490BF6, 0x00000000
+ data8 0xA43515AE09E6809E, 0x00003FFF, 0x1DD1DB48, 0x00000000
+ data8 0xA5FED6A9B15138EA, 0x00003FFF, 0x1E65EBFB, 0x00000000
+ data8 0xA7CD93B4E965356A, 0x00003FFF, 0x9F427496, 0x00000000
+ data8 0xA9A15AB4EA7C0EF8, 0x00003FFF, 0x1F283C4A, 0x00000000
+ data8 0xAB7A39B5A93ED337, 0x00003FFF, 0x1F4B0047, 0x00000000
+ data8 0xAD583EEA42A14AC6, 0x00003FFF, 0x1F130152, 0x00000000
+ data8 0xAF3B78AD690A4375, 0x00003FFF, 0x9E8367C0, 0x00000000
+ data8 0xB123F581D2AC2590, 0x00003FFF, 0x9F705F90, 0x00000000
+ data8 0xB311C412A9112489, 0x00003FFF, 0x1EFB3C53, 0x00000000
+ data8 0xB504F333F9DE6484, 0x00003FFF, 0x1F32FB13, 0x00000000
+ASM_SIZE_DIRECTIVE(double_cosh_j_table)
+
+.align 32
+.global coshl#
+
+.section .text
+.proc coshl#
+.align 32
+
+coshl:
+
+#ifdef _LIBC
+.global __ieee754_coshl#
+.proc __ieee754_coshl#
+__ieee754_coshl:
+#endif
+
+// X NAN?
+
+{ .mfi
+ alloc r32 = ar.pfs,0,12,4,0
+(p0) fclass.m.unc p6,p7 = f8, 0xc3
+ mov cosh_GR_all_ones = -1
+};;
+
+// This is more than we need but it is in preparation
+// for the values we add for error support. We push three
+// addresses on the stack (3*8) = 24 bytes and one tag
+
+{ .mfb
+ nop.m 999
+(p6) fma.s0 f8 = f8,f1,f8
+(p6) br.ret.spnt b0 ;;
+}
+
+
+// Make constant that will generate inexact when squared
+// X infinity
+{ .mfi
+ setf.sig cosh_FR_all_ones = cosh_GR_all_ones
+(p0) fclass.m.unc p6,p0 = f8, 0x23
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p6) fmerge.s f8 = f0,f8
+(p6) br.ret.spnt b0 ;;
+}
+
+
+
+// Put 0.25 in f9; p6 true if x < 0.25
+{ .mlx
+ nop.m 999
+(p0) movl r32 = 0x000000000000fffd ;;
+}
+
+{ .mfi
+(p0) setf.exp f9 = r32
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fmerge.s cosh_FR_X = f0,f8
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fmerge.s cosh_FR_SGNX = f8,f1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fcmp.lt.unc p0,p7 = cosh_FR_X,f9
+ nop.i 999 ;;
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+(p7) br.cond.sptk L(COSH_BY_TBL)
+}
+;;
+
+
+// COSH_BY_POLY:
+// POLY cannot overflow so there is no need to call __libm_error_support
+// Get the values of P_x from the table
+
+{ .mmi
+ nop.m 999
+(p0) addl r34 = @ltoff(double_cosh_p_table), gp
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ld8 r34 = [r34]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+
+// Calculate cosh_FR_X2 = ax*ax and cosh_FR_X4 = ax*ax*ax*ax
+{ .mmf
+ nop.m 999
+(p0) ldfe cosh_FR_P1 = [r34],16
+(p0) fma.s1 cosh_FR_X2 = cosh_FR_X, cosh_FR_X, f0 ;;
+}
+
+{ .mmi
+(p0) ldfe cosh_FR_P2 = [r34],16 ;;
+(p0) ldfe cosh_FR_P3 = [r34],16
+ nop.i 999 ;;
+}
+
+{ .mmi
+(p0) ldfe cosh_FR_P4 = [r34],16 ;;
+(p0) ldfe cosh_FR_P5 = [r34],16
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p0) ldfe cosh_FR_P6 = [r34],16
+(p0) fma.s1 cosh_FR_X4 = cosh_FR_X2, cosh_FR_X2, f0
+ nop.i 999 ;;
+}
+
+// Calculate cosh_FR_podd = x4 *(x4 * P_5 + P_3) + P_1
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_poly_podd_temp1 = cosh_FR_X4, cosh_FR_P5, cosh_FR_P3
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_podd = cosh_FR_X4, cosh_FR_poly_podd_temp1, cosh_FR_P1
+ nop.i 999
+}
+
+// Calculate cosh_FR_peven = p_even = x4 *(x4 * (x4 * P_6 + P_4) + P_2)
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_poly_peven_temp1 = cosh_FR_X4, cosh_FR_P6, cosh_FR_P4
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_poly_peven_temp2 = cosh_FR_X4, cosh_FR_poly_peven_temp1, cosh_FR_P2
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_peven = cosh_FR_X4, cosh_FR_poly_peven_temp2, f0
+ nop.i 999 ;;
+}
+
+// Y_lo = x2*p_odd + p_even
+// Calculate f8 = Y_hi + Y_lo
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_Y_lo = cosh_FR_X2, cosh_FR_podd, cosh_FR_peven
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p0) fma.s0 f8 = f1, f1, cosh_FR_Y_lo
+(p0) br.ret.sptk b0 ;;
+}
+
+
+L(COSH_BY_TBL):
+
+// Now that we are at TBL; so far all we know is that |x| >= 0.25.
+// The first two steps are the same for TBL and EXP, but if we are HUGE
+// Double Extended
+// Go to HUGE if |x| >= 2^14, 1000d (register-biased) is e = 14 (true)
+// Double
+// Go to HUGE if |x| >= 2^10, 10009 (register-biased) is e = 10 (true)
+// Single
+// Go to HUGE if |x| >= 2^7, 10006 (register-biased) is e = 7 (true)
+// we want to leave now. Go to HUGE if |x| >= 2^14
+// 1000d (register-biased) is e = 14 (true)
+
+{ .mlx
+ nop.m 999
+(p0) movl r32 = 0x000000000001000d ;;
+}
+
+{ .mfi
+(p0) setf.exp f9 = r32
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fcmp.ge.unc p6,p7 = cosh_FR_X,f9
+ nop.i 999 ;;
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+(p6) br.cond.spnt L(COSH_HUGE) ;;
+}
+
+// r32 = 1
+// r34 = N-1
+// r35 = N
+// r36 = j
+// r37 = N+1
+
+// TBL can never overflow
+// cosh(x) = cosh(B+R)
+// = cosh(B) cosh(R) + sinh(B) sinh(R)
+// cosh(R) can be approximated by 1 + p_even
+// sinh(R) can be approximated by p_odd
+
+// ******************************************************
+// STEP 1 (TBL and EXP)
+// ******************************************************
+// Get the following constants.
+// f9 = Inv_log2by64
+// f10 = log2by64_hi
+// f11 = log2by64_lo
+
+{ .mmi
+(p0) adds r32 = 0x1,r0
+(p0) addl r34 = @ltoff(double_cosh_arg_reduction), gp
+ nop.i 999
+}
+;;
+
+// We want 2^(N-1) and 2^(-N-1). So bias N-1 and -N-1 and
+// put them in an exponent.
+// cosh_FR_spos = 2^(N-1) and cosh_FR_sneg = 2^(-N-1)
+// r39 = 0xffff + (N-1) = 0xffff +N -1
+// r40 = 0xffff - (N +1) = 0xffff -N -1
+
+{ .mlx
+ ld8 r34 = [r34]
+(p0) movl r38 = 0x000000000000fffe ;;
+}
+
+{ .mmi
+(p0) ldfe cosh_FR_Inv_log2by64 = [r34],16 ;;
+(p0) ldfe cosh_FR_log2by64_hi = [r34],16
+ nop.i 999 ;;
+}
+
+{ .mbb
+(p0) ldfe cosh_FR_log2by64_lo = [r34],16
+ nop.b 999
+ nop.b 999 ;;
+}
+
+// Get the A coefficients
+// f9 = A_1
+// f10 = A_2
+// f11 = A_3
+
+{ .mmi
+ nop.m 999
+(p0) addl r34 = @ltoff(double_cosh_ab_table), gp
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ld8 r34 = [r34]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+
+// Calculate M and keep it as integer and floating point.
+// M = round-to-integer(x*Inv_log2by64)
+// cosh_FR_M = M = truncate(ax/(log2/64))
+// Put the significand of M in r35
+// and the floating point representation of M in cosh_FR_M
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_M = cosh_FR_X, cosh_FR_Inv_log2by64, f0
+ nop.i 999
+}
+
+{ .mfi
+(p0) ldfe cosh_FR_A1 = [r34],16
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fcvt.fx.s1 cosh_FR_M_temp = cosh_FR_M
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fnorm.s1 cosh_FR_M = cosh_FR_M_temp
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p0) getf.sig r35 = cosh_FR_M_temp
+ nop.f 999
+ nop.i 999 ;;
+}
+
+// M is still in r35. Calculate j. j is the signed extension of the six lsb of M. It
+// has a range of -32 thru 31.
+// r35 = M
+// r36 = j
+{ .mii
+ nop.m 999
+ nop.i 999 ;;
+(p0) and r36 = 0x3f, r35 ;;
+}
+
+// Calculate R
+// f13 = f44 - f12*f10 = x - M*log2by64_hi
+// f14 = f13 - f8*f11 = R = (x - M*log2by64_hi) - M*log2by64_lo
+
+{ .mfi
+ nop.m 999
+(p0) fnma.s1 cosh_FR_R_temp = cosh_FR_M, cosh_FR_log2by64_hi, cosh_FR_X
+ nop.i 999
+}
+
+{ .mfi
+(p0) ldfe cosh_FR_A2 = [r34],16
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fnma.s1 cosh_FR_R = cosh_FR_M, cosh_FR_log2by64_lo, cosh_FR_R_temp
+ nop.i 999
+}
+
+// Get the B coefficients
+// f15 = B_1
+// f32 = B_2
+// f33 = B_3
+
+{ .mmi
+(p0) ldfe cosh_FR_A3 = [r34],16 ;;
+(p0) ldfe cosh_FR_B1 = [r34],16
+ nop.i 999 ;;
+}
+
+{ .mmi
+(p0) ldfe cosh_FR_B2 = [r34],16 ;;
+(p0) ldfe cosh_FR_B3 = [r34],16
+ nop.i 999 ;;
+}
+
+{ .mii
+ nop.m 999
+(p0) shl r34 = r36, 0x2 ;;
+(p0) sxt1 r37 = r34 ;;
+}
+
+// ******************************************************
+// STEP 2 (TBL and EXP)
+// ******************************************************
+// Calculate Rsquared and Rcubed in preparation for p_even and p_odd
+// f12 = R*R*R
+// f13 = R*R
+// f14 = R <== from above
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_Rsq = cosh_FR_R, cosh_FR_R, f0
+(p0) shr r36 = r37, 0x2 ;;
+}
+
+// r34 = M-j = r35 - r36
+// r35 = N = (M-j)/64
+
+{ .mii
+(p0) sub r34 = r35, r36
+ nop.i 999 ;;
+(p0) shr r35 = r34, 0x6 ;;
+}
+
+{ .mii
+(p0) sub r40 = r38, r35
+(p0) adds r37 = 0x1, r35
+(p0) add r39 = r38, r35 ;;
+}
+
+// Get the address of the J table, add the offset,
+// addresses are sinh_AD_mJ and sinh_AD_J, get the T value
+// f32 = T(j)_hi
+// f33 = T(j)_lo
+// f34 = T(-j)_hi
+// f35 = T(-j)_lo
+
+{ .mmi
+(p0) sub r34 = r35, r32
+(p0) addl r37 = @ltoff(double_cosh_j_table), gp
+ nop.i 999
+}
+;;
+
+{ .mfi
+ ld8 r37 = [r37]
+(p0) fma.s1 cosh_FR_Rcub = cosh_FR_Rsq, cosh_FR_R, f0
+ nop.i 999
+}
+
+// ******************************************************
+// STEP 3 Now decide if we need to branch to EXP
+// ******************************************************
+// Put 32 in f9; p6 true if x < 32
+
+{ .mlx
+ nop.m 999
+(p0) movl r32 = 0x0000000000010004 ;;
+}
+
+// Calculate p_even
+// f34 = B_2 + Rsq *B_3
+// f35 = B_1 + Rsq*f34 = B_1 + Rsq * (B_2 + Rsq *B_3)
+// f36 = peven = Rsq * f35 = Rsq * (B_1 + Rsq * (B_2 + Rsq *B_3))
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_peven_temp1 = cosh_FR_Rsq, cosh_FR_B3, cosh_FR_B2
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_peven_temp2 = cosh_FR_Rsq, cosh_FR_peven_temp1, cosh_FR_B1
+ nop.i 999
+}
+
+// Calculate p_odd
+// f34 = A_2 + Rsq *A_3
+// f35 = A_1 + Rsq * (A_2 + Rsq *A_3)
+// f37 = podd = R + Rcub * (A_1 + Rsq * (A_2 + Rsq *A_3))
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_podd_temp1 = cosh_FR_Rsq, cosh_FR_A3, cosh_FR_A2
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p0) setf.exp cosh_FR_N_temp1 = r39
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_peven = cosh_FR_Rsq, cosh_FR_peven_temp2, f0
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_podd_temp2 = cosh_FR_Rsq, cosh_FR_podd_temp1, cosh_FR_A1
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p0) setf.exp f9 = r32
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_podd = cosh_FR_podd_temp2, cosh_FR_Rcub, cosh_FR_R
+ nop.i 999
+}
+
+// sinh_GR_mj contains the table offset for -j
+// sinh_GR_j contains the table offset for +j
+// p6 is true when j <= 0
+
+{ .mlx
+(p0) setf.exp cosh_FR_N_temp2 = r40
+(p0) movl r40 = 0x0000000000000020 ;;
+}
+
+{ .mfi
+(p0) sub GR_mJ = r40, r36
+(p0) fmerge.se cosh_FR_spos = cosh_FR_N_temp1, f1
+(p0) adds GR_J = 0x20, r36 ;;
+}
+
+{ .mii
+ nop.m 999
+(p0) shl GR_mJ = GR_mJ, 5 ;;
+(p0) add AD_mJ = r37, GR_mJ ;;
+}
+
+{ .mmi
+ nop.m 999
+(p0) ldfe cosh_FR_Tmjhi = [AD_mJ],16
+(p0) shl GR_J = GR_J, 5 ;;
+}
+
+{ .mfi
+(p0) ldfs cosh_FR_Tmjlo = [AD_mJ],16
+(p0) fcmp.lt.unc.s1 p6,p7 = cosh_FR_X,f9
+(p0) add AD_J = r37, GR_J ;;
+}
+
+{ .mmi
+(p0) ldfe cosh_FR_Tjhi = [AD_J],16 ;;
+(p0) ldfs cosh_FR_Tjlo = [AD_J],16
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p0) fmerge.se cosh_FR_sneg = cosh_FR_N_temp2, f1
+(p7) br.cond.spnt L(COSH_BY_EXP) ;;
+}
+
+// ******************************************************
+// If NOT branch to EXP
+// ******************************************************
+// Calculate C_hi
+// ******************************************************
+// cosh_FR_C_hi_temp = cosh_FR_sneg * cosh_FR_Tmjhi
+// cosh_FR_C_hi = cosh_FR_spos * cosh_FR_Tjhi + (cosh_FR_sneg * cosh_FR_Tmjhi)
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_C_hi_temp = cosh_FR_sneg, cosh_FR_Tmjhi, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_C_hi = cosh_FR_spos, cosh_FR_Tjhi, cosh_FR_C_hi_temp
+ nop.i 999
+}
+
+// ******************************************************
+// Calculate S_hi
+// ******************************************************
+// cosh_FR_S_hi_temp1 = cosh_FR_sneg * cosh_FR_Tmjhi
+// cosh_FR_S_hi = cosh_FR_spos * cosh_FR_Tjhi - cosh_FR_C_hi_temp1
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_S_hi_temp1 = cosh_FR_sneg, cosh_FR_Tmjhi, f0
+ nop.i 999 ;;
+}
+
+// ******************************************************
+// Calculate C_lo
+// ******************************************************
+// cosh_FR_C_lo_temp1 = cosh_FR_spos * cosh_FR_Tjhi - cosh_FR_C_hi
+// cosh_FR_C_lo_temp2 = cosh_FR_sneg * cosh_FR_Tmjlo + (cosh_FR_spos * cosh_FR_Tjhi - cosh_FR_C_hi)
+// cosh_FR_C_lo_temp1 = cosh_FR_sneg * cosh_FR_Tmjlo
+// cosh_FR_C_lo_temp3 = cosh_FR_spos * cosh_FR_Tjlo + (cosh_FR_sneg * cosh_FR_Tmjlo)
+// cosh_FR_C_lo = cosh_FR_C_lo_temp3 + cosh_FR_C_lo_temp2
+
+{ .mfi
+ nop.m 999
+(p0) fms.s1 cosh_FR_C_lo_temp1 = cosh_FR_spos, cosh_FR_Tjhi, cosh_FR_C_hi
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fms.s1 cosh_FR_S_hi = cosh_FR_spos, cosh_FR_Tjhi, cosh_FR_S_hi_temp1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_C_lo_temp2 = cosh_FR_sneg, cosh_FR_Tmjhi, cosh_FR_C_lo_temp1
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_C_lo_temp1 = cosh_FR_sneg, cosh_FR_Tmjlo, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_C_lo_temp3 = cosh_FR_spos, cosh_FR_Tjlo, cosh_FR_C_lo_temp1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_C_lo = cosh_FR_C_lo_temp3, f1, cosh_FR_C_lo_temp2
+ nop.i 999 ;;
+}
+
+// ******************************************************
+// cosh_FR_Y_lo_temp = cosh_FR_C_hi * cosh_FR_peven + cosh_FR_C_lo
+// cosh_FR_Y_lo = cosh_FR_S_hi * cosh_FR_podd + cosh_FR_Y_lo_temp
+// cosh_FR_COSH = Y_hi + Y_lo
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_Y_lo_temp = cosh_FR_C_hi, cosh_FR_peven, cosh_FR_C_lo
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_Y_lo = cosh_FR_S_hi, cosh_FR_podd, cosh_FR_Y_lo_temp
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p0) fma.s0 f8 = cosh_FR_C_hi, f1, cosh_FR_Y_lo
+(p0) br.ret.sptk b0 ;;
+}
+
+L(COSH_BY_EXP):
+
+// When p7 is true, we know that an overflow is not going to happen
+// When p7 is false, we must check for possible overflow
+// p7 is the over_SAFE flag
+// f44 = Scale * (Y_hi + Y_lo)
+// = cosh_FR_spos * (cosh_FR_Tjhi + cosh_FR_Y_lo)
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_Y_lo_temp = cosh_FR_peven, f1, cosh_FR_podd
+ nop.i 999
+}
+
+// Now we are in EXP. This is the only path where an overflow is possible
+// but not for certain. So this is the only path where over_SAFE has any use.
+// r34 still has N-1
+// There is a danger of double-extended overflow if N-1 > 0x3ffe = 16382
+// There is a danger of double overflow if N-1 > 0x3fe = 1022
+// There is a danger of single overflow if N-1 > 0x7e = 126
+
+{ .mlx
+ nop.m 999
+(p0) movl r32 = 0x0000000000003ffe ;;
+}
+
+{ .mfi
+(p0) cmp.gt.unc p0,p7 = r34, r32
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_Y_lo = cosh_FR_Tjhi, cosh_FR_Y_lo_temp, cosh_FR_Tjlo
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_COSH_temp = cosh_FR_Y_lo, f1, cosh_FR_Tjhi
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s0 f44 = cosh_FR_spos, cosh_FR_COSH_temp, f0
+ nop.i 999 ;;
+}
+
+// Dummy multiply to generate inexact
+{ .mfi
+ nop.m 999
+(p7) fmpy.s0 cosh_FR_tmp = cosh_FR_all_ones, cosh_FR_all_ones
+ nop.i 999 ;;
+}
+
+// If over_SAFE is set, return
+{ .mfb
+ nop.m 999
+(p7) fmerge.s f8 = f44,f44
+(p7) br.ret.sptk b0 ;;
+}
+
+// Else see if we overflowed
+// S0 user supplied status
+// S2 user supplied status + WRE + TD (Overflows)
+// If WRE is set then an overflow will not occur in EXP.
+// The input value that would cause a register (WRE) value to overflow is about 2^15
+// and this input would go into the HUGE path.
+// Answer with WRE is in f43.
+
+{ .mfi
+ nop.m 999
+(p0) fsetc.s2 0x7F,0x42
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s2 f43 = cosh_FR_spos, cosh_FR_COSH_temp, f0
+ nop.i 999 ;;
+}
+
+// 103FF => 103FF -FFFF = 400(true)
+// 400 + 3FF = 7FF, which is 1 more than the exponent of the largest
+// double (7FE). So 0 103FF 8000000000000000 is one ulp more than
+// largest double in register bias
+
+// 13FFF => 13FFF -FFFF = 4000(true)
+
+// Now set p8 if the answer with WRE is greater than or equal this value
+// Also set p9 if the answer with WRE is less than or equal to negative this value
+
+{ .mlx
+ nop.m 999
+(p0) movl r32 = 0x0000000000013fff ;;
+}
+
+{ .mmf
+ nop.m 999
+(p0) setf.exp f41 = r32
+(p0) fsetc.s2 0x7F,0x40 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fcmp.ge.unc.s1 p8, p0 = f43, f41
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fmerge.ns f42 = f41, f41
+ nop.i 999 ;;
+}
+
+// The error tag for overflow is 63
+{ .mii
+ nop.m 999
+ nop.i 999 ;;
+(p8) mov GR_Parameter_TAG = 63 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p0) fcmp.le.unc.s1 p9, p0 = f43, f42
+(p8) br.cond.spnt __libm_error_region ;;
+}
+
+{ .mii
+ nop.m 999
+ nop.i 999 ;;
+(p9) mov GR_Parameter_TAG = 63
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+(p9) br.cond.spnt __libm_error_region ;;
+}
+
+// Dummy multiply to generate inexact
+{ .mfi
+ nop.m 999
+(p0) fmpy.s0 cosh_FR_tmp = cosh_FR_all_ones, cosh_FR_all_ones
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p0) fmerge.s f8 = f44,f44
+(p0) br.ret.sptk b0 ;;
+}
+
+
+// for COSH_HUGE, put 24000 in exponent; take sign from input; add 1
+// SAFE: SAFE is always 0 for HUGE
+
+L(COSH_HUGE):
+
+{ .mlx
+ nop.m 999
+(p0) movl r32 = 0x0000000000015dbf ;;
+}
+
+{ .mfi
+(p0) setf.exp f9 = r32
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 cosh_FR_hi_lo = f1, f9, f1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s0 f44 = f9, cosh_FR_hi_lo, f0
+(p0) mov GR_Parameter_TAG = 63
+}
+.endp coshl
+ASM_SIZE_DIRECTIVE(coshl)
+
+.proc __libm_error_region
+__libm_error_region:
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfe [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mib
+ stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
+}
+{ .mib
+ stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+{ .mmi
+ ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_exp.S b/sysdeps/ia64/fpu/e_exp.S
new file mode 100644
index 0000000..06657b9
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_exp.S
@@ -0,0 +1,815 @@
+.file "exp.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00 Initial version
+// 3/07/00 exp(inf) = inf but now does NOT call error support
+// exp(-inf) = 0 but now does NOT call error support
+// 4/04/00 Unwind support added
+// 8/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+// 11/30/00 Reworked to shorten main path, widen main path to include all
+// args in normal range, and add quick exit for 0, nan, inf.
+// 12/05/00 Loaded constants earlier with setf to save 2 cycles.
+
+// API
+//==============================================================
+// double exp(double)
+
+// Overview of operation
+//==============================================================
+// Take the input x. w is "how many log2/128 in x?"
+// w = x * 128/log2
+// n = int(w)
+// x = n log2/128 + r + delta
+
+// n = 128M + index_1 + 2^4 index_2
+// x = M log2 + (log2/128) index_1 + (log2/8) index_2 + r + delta
+
+// exp(x) = 2^M 2^(index_1/128) 2^(index_2/8) exp(r) exp(delta)
+// Construct 2^M
+// Get 2^(index_1/128) from table_1;
+// Get 2^(index_2/8) from table_2;
+// Calculate exp(r) by series
+// r = x - n (log2/128)_high
+// delta = - n (log2/128)_low
+// Calculate exp(delta) as 1 + delta
+
+
+// Special values
+//==============================================================
+// exp(+0) = 1.0
+// exp(-0) = 1.0
+
+// exp(+qnan) = +qnan
+// exp(-qnan) = -qnan
+// exp(+snan) = +qnan
+// exp(-snan) = -qnan
+
+// exp(-inf) = +0
+// exp(+inf) = +inf
+
+// Overfow and Underfow
+//=======================
+// exp(-x) = smallest double normal when
+// x = -708.396 = c086232bdd7abcd2
+
+// exp(x) = largest double normal when
+// x = 709.7827 = 40862e42fefa39ef
+
+
+
+// Registers used
+//==============================================================
+// Floating Point registers used:
+// f8, input
+// f9 -> f15, f32 -> f60
+
+// General registers used:
+// r32 -> r60
+
+// Predicate registers used:
+// p6 -> p15
+
+#include "libm_support.h"
+
+// Assembly macros
+//==============================================================
+
+exp_GR_rshf = r33
+EXP_AD_TB1 = r34
+EXP_AD_TB2 = r35
+EXP_AD_P = r36
+
+exp_GR_N = r37
+exp_GR_index_1 = r38
+exp_GR_index_2_16 = r39
+
+exp_GR_biased_M = r40
+exp_GR_index_1_16 = r41
+EXP_AD_T1 = r42
+EXP_AD_T2 = r43
+exp_GR_sig_inv_ln2 = r44
+
+exp_GR_17ones = r45
+exp_GR_one = r46
+exp_TB1_size = r47
+exp_TB2_size = r48
+exp_GR_rshf_2to56 = r49
+
+exp_GR_gt_ln = r50
+exp_GR_exp_2tom56 = r51
+
+exp_GR_17ones_m1 = r52
+
+GR_SAVE_B0 = r53
+GR_SAVE_PFS = r54
+GR_SAVE_GP = r55
+GR_SAVE_SP = r56
+
+GR_Parameter_X = r57
+GR_Parameter_Y = r58
+GR_Parameter_RESULT = r59
+GR_Parameter_TAG = r60
+
+
+FR_X = f10
+FR_Y = f1
+FR_RESULT = f8
+
+EXP_RSHF_2TO56 = f6
+EXP_INV_LN2_2TO63 = f7
+EXP_W_2TO56_RSH = f9
+EXP_2TOM56 = f11
+exp_P4 = f12
+exp_P3 = f13
+exp_P2 = f14
+exp_P1 = f15
+
+exp_ln2_by_128_hi = f33
+exp_ln2_by_128_lo = f34
+
+EXP_RSHF = f35
+EXP_Nfloat = f36
+exp_W = f37
+exp_r = f38
+exp_f = f39
+
+exp_rsq = f40
+exp_rcube = f41
+
+EXP_2M = f42
+exp_S1 = f43
+exp_T1 = f44
+
+EXP_MIN_DBL_OFLOW_ARG = f45
+EXP_MAX_DBL_ZERO_ARG = f46
+EXP_MAX_DBL_NORM_ARG = f47
+EXP_MAX_DBL_UFLOW_ARG = f48
+EXP_MIN_DBL_NORM_ARG = f49
+exp_rP4pP3 = f50
+exp_P_lo = f51
+exp_P_hi = f52
+exp_P = f53
+exp_S = f54
+
+EXP_NORM_f8 = f56
+
+exp_wre_urm_f8 = f57
+exp_ftz_urm_f8 = f57
+
+exp_gt_pln = f58
+
+exp_S2 = f59
+exp_T2 = f60
+
+
+// Data tables
+//==============================================================
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
+.align 16
+
+// ************* DO NOT CHANGE ORDER OF THESE TABLES ********************
+
+// double-extended 1/ln(2)
+// 3fff b8aa 3b29 5c17 f0bb be87fed0691d3e88
+// 3fff b8aa 3b29 5c17 f0bc
+// For speed the significand will be loaded directly with a movl and setf.sig
+// and the exponent will be bias+63 instead of bias+0. Thus subsequent
+// computations need to scale appropriately.
+// The constant 128/ln(2) is needed for the computation of w. This is also
+// obtained by scaling the computations.
+//
+// Two shifting constants are loaded directly with movl and setf.d.
+// 1. EXP_RSHF_2TO56 = 1.1000..00 * 2^(63-7)
+// This constant is added to x*1/ln2 to shift the integer part of
+// x*128/ln2 into the rightmost bits of the significand.
+// The result of this fma is EXP_W_2TO56_RSH.
+// 2. EXP_RSHF = 1.1000..00 * 2^(63)
+// This constant is subtracted from EXP_W_2TO56_RSH * 2^(-56) to give
+// the integer part of w, n, as a floating-point number.
+// The result of this fms is EXP_Nfloat.
+
+
+exp_table_1:
+ASM_TYPE_DIRECTIVE(exp_table_1,@object)
+data8 0x40862e42fefa39f0 // smallest dbl overflow arg
+data8 0xc0874c0000000000 // approx largest arg for zero result
+data8 0x40862e42fefa39ef // largest dbl arg to give normal dbl result
+data8 0xc086232bdd7abcd3 // largest dbl underflow arg
+data8 0xc086232bdd7abcd2 // smallest dbl arg to give normal dbl result
+data8 0x0 // pad
+data8 0xb17217f7d1cf79ab , 0x00003ff7 // ln2/128 hi
+data8 0xc9e3b39803f2f6af , 0x00003fb7 // ln2/128 lo
+
+// Table 1 is 2^(index_1/128) where
+// index_1 goes from 0 to 15
+
+data8 0x8000000000000000 , 0x00003FFF
+data8 0x80B1ED4FD999AB6C , 0x00003FFF
+data8 0x8164D1F3BC030773 , 0x00003FFF
+data8 0x8218AF4373FC25EC , 0x00003FFF
+data8 0x82CD8698AC2BA1D7 , 0x00003FFF
+data8 0x8383594EEFB6EE37 , 0x00003FFF
+data8 0x843A28C3ACDE4046 , 0x00003FFF
+data8 0x84F1F656379C1A29 , 0x00003FFF
+data8 0x85AAC367CC487B15 , 0x00003FFF
+data8 0x8664915B923FBA04 , 0x00003FFF
+data8 0x871F61969E8D1010 , 0x00003FFF
+data8 0x87DB357FF698D792 , 0x00003FFF
+data8 0x88980E8092DA8527 , 0x00003FFF
+data8 0x8955EE03618E5FDD , 0x00003FFF
+data8 0x8A14D575496EFD9A , 0x00003FFF
+data8 0x8AD4C6452C728924 , 0x00003FFF
+ASM_SIZE_DIRECTIVE(exp_table_1)
+
+// Table 2 is 2^(index_1/8) where
+// index_2 goes from 0 to 7
+exp_table_2:
+ASM_TYPE_DIRECTIVE(exp_table_2,@object)
+data8 0x8000000000000000 , 0x00003FFF
+data8 0x8B95C1E3EA8BD6E7 , 0x00003FFF
+data8 0x9837F0518DB8A96F , 0x00003FFF
+data8 0xA5FED6A9B15138EA , 0x00003FFF
+data8 0xB504F333F9DE6484 , 0x00003FFF
+data8 0xC5672A115506DADD , 0x00003FFF
+data8 0xD744FCCAD69D6AF4 , 0x00003FFF
+data8 0xEAC0C6E7DD24392F , 0x00003FFF
+ASM_SIZE_DIRECTIVE (exp_table_2)
+
+
+exp_p_table:
+ASM_TYPE_DIRECTIVE(exp_p_table,@object)
+data8 0x3f8111116da21757 //P_4
+data8 0x3fa55555d787761c //P_3
+data8 0x3fc5555555555414 //P_2
+data8 0x3fdffffffffffd6a //P_1
+ASM_SIZE_DIRECTIVE(exp_p_table)
+
+
+.align 32
+.global exp#
+
+.section .text
+.proc exp#
+.align 32
+exp:
+#ifdef _LIBC
+.global __ieee754_exp#
+__ieee754_exp:
+#endif
+
+{ .mlx
+ alloc r32=ar.pfs,1,24,4,0
+ movl exp_GR_sig_inv_ln2 = 0xb8aa3b295c17f0bc // significand of 1/ln2
+}
+{ .mlx
+ addl EXP_AD_TB1 = @ltoff(exp_table_1), gp
+ movl exp_GR_rshf_2to56 = 0x4768000000000000 ;; // 1.10000 2^(63+56)
+}
+;;
+
+// We do this fnorm right at the beginning to take any enabled
+// faults and to normalize any input unnormals so that SWA is not taken.
+{ .mfi
+ ld8 EXP_AD_TB1 = [EXP_AD_TB1]
+ fclass.m p8,p0 = f8,0x07 // Test for x=0
+ mov exp_GR_17ones = 0x1FFFF
+}
+{ .mfi
+ mov exp_TB1_size = 0x100
+ fnorm EXP_NORM_f8 = f8
+ mov exp_GR_exp_2tom56 = 0xffff-56
+}
+;;
+
+// Form two constants we need
+// 1/ln2 * 2^63 to compute w = x * 1/ln2 * 128
+// 1.1000..000 * 2^(63+63-7) to right shift int(w) into the significand
+
+{ .mmf
+ setf.sig EXP_INV_LN2_2TO63 = exp_GR_sig_inv_ln2 // form 1/ln2 * 2^63
+ setf.d EXP_RSHF_2TO56 = exp_GR_rshf_2to56 // Form const 1.100 * 2^(63+56)
+ fclass.m p9,p0 = f8,0x22 // Test for x=-inf
+}
+;;
+
+{ .mlx
+ setf.exp EXP_2TOM56 = exp_GR_exp_2tom56 // form 2^-56 for scaling Nfloat
+ movl exp_GR_rshf = 0x43e8000000000000 // 1.10000 2^63 for right shift
+}
+{ .mfb
+ mov exp_TB2_size = 0x80
+(p8) fma.d f8 = f1,f1,f0 // quick exit for x=0
+(p8) br.ret.spnt b0
+;;
+}
+
+{ .mfi
+ ldfpd EXP_MIN_DBL_OFLOW_ARG, EXP_MAX_DBL_ZERO_ARG = [EXP_AD_TB1],16
+ fclass.m p10,p0 = f8,0x21 // Test for x=+inf
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p9) fma.d f8 = f0,f0,f0 // quick exit for x=-inf
+(p9) br.ret.spnt b0
+;;
+}
+
+{ .mmf
+ ldfpd EXP_MAX_DBL_NORM_ARG, EXP_MAX_DBL_UFLOW_ARG = [EXP_AD_TB1],16
+ setf.d EXP_RSHF = exp_GR_rshf // Form right shift const 1.100 * 2^63
+ fclass.m p11,p0 = f8,0xc3 // Test for x=nan
+;;
+}
+
+{ .mfb
+ ldfd EXP_MIN_DBL_NORM_ARG = [EXP_AD_TB1],16
+ nop.f 999
+(p10) br.ret.spnt b0 // quick exit for x=+inf
+;;
+}
+
+{ .mfi
+ ldfe exp_ln2_by_128_hi = [EXP_AD_TB1],16
+ nop.f 999
+ nop.i 999
+;;
+}
+
+
+{ .mfb
+ ldfe exp_ln2_by_128_lo = [EXP_AD_TB1],16
+(p11) fmerge.s f8 = EXP_NORM_f8, EXP_NORM_f8
+(p11) br.ret.spnt b0 // quick exit for x=nan
+;;
+}
+
+// After that last load, EXP_AD_TB1 points to the beginning of table 1
+
+// W = X * Inv_log2_by_128
+// By adding 1.10...0*2^63 we shift and get round_int(W) in significand.
+// We actually add 1.10...0*2^56 to X * Inv_log2 to do the same thing.
+
+{ .mfi
+ nop.m 999
+ fma.s1 EXP_W_2TO56_RSH = EXP_NORM_f8, EXP_INV_LN2_2TO63, EXP_RSHF_2TO56
+ nop.i 999
+;;
+}
+
+
+// Divide arguments into the following categories:
+// Certain Underflow/zero p11 - -inf < x <= MAX_DBL_ZERO_ARG
+// Certain Underflow p12 - MAX_DBL_ZERO_ARG < x <= MAX_DBL_UFLOW_ARG
+// Possible Underflow p13 - MAX_DBL_UFLOW_ARG < x < MIN_DBL_NORM_ARG
+// Certain Safe - MIN_DBL_NORM_ARG <= x <= MAX_DBL_NORM_ARG
+// Possible Overflow p14 - MAX_DBL_NORM_ARG < x < MIN_DBL_OFLOW_ARG
+// Certain Overflow p15 - MIN_DBL_OFLOW_ARG <= x < +inf
+//
+// If the input is really a double arg, then there will never be "Possible
+// Underflow" or "Possible Overflow" arguments.
+//
+
+{ .mfi
+ add EXP_AD_TB2 = exp_TB1_size, EXP_AD_TB1
+ fcmp.ge.s1 p15,p14 = EXP_NORM_f8,EXP_MIN_DBL_OFLOW_ARG
+ nop.i 999
+;;
+}
+
+{ .mfi
+ add EXP_AD_P = exp_TB2_size, EXP_AD_TB2
+ fcmp.le.s1 p11,p12 = EXP_NORM_f8,EXP_MAX_DBL_ZERO_ARG
+ nop.i 999
+;;
+}
+
+{ .mfb
+ ldfpd exp_P4, exp_P3 = [EXP_AD_P] ,16
+(p14) fcmp.gt.unc.s1 p14,p0 = EXP_NORM_f8,EXP_MAX_DBL_NORM_ARG
+(p15) br.cond.spnt L(EXP_CERTAIN_OVERFLOW)
+;;
+}
+
+
+// Nfloat = round_int(W)
+// The signficand of EXP_W_2TO56_RSH contains the rounded integer part of W,
+// as a twos complement number in the lower bits (that is, it may be negative).
+// That twos complement number (called N) is put into exp_GR_N.
+
+// Since EXP_W_2TO56_RSH is scaled by 2^56, it must be multiplied by 2^-56
+// before the shift constant 1.10000 * 2^63 is subtracted to yield EXP_Nfloat.
+// Thus, EXP_Nfloat contains the floating point version of N
+
+
+{ .mfi
+ nop.m 999
+(p12) fcmp.le.unc p12,p0 = EXP_NORM_f8,EXP_MAX_DBL_UFLOW_ARG
+ nop.i 999
+}
+{ .mfb
+ ldfpd exp_P2, exp_P1 = [EXP_AD_P]
+ fms.s1 EXP_Nfloat = EXP_W_2TO56_RSH, EXP_2TOM56, EXP_RSHF
+(p11) br.cond.spnt L(EXP_CERTAIN_UNDERFLOW_ZERO)
+;;
+}
+
+{ .mfi
+ getf.sig exp_GR_N = EXP_W_2TO56_RSH
+(p13) fcmp.lt.unc p13,p0 = EXP_NORM_f8,EXP_MIN_DBL_NORM_ARG
+ nop.i 999
+;;
+}
+
+
+// exp_GR_index_1 has index_1
+// exp_GR_index_2_16 has index_2 * 16
+// exp_GR_biased_M has M
+// exp_GR_index_1_16 has index_1 * 16
+
+// r2 has true M
+{ .mfi
+ and exp_GR_index_1 = 0x0f, exp_GR_N
+ fnma.s1 exp_r = EXP_Nfloat, exp_ln2_by_128_hi, EXP_NORM_f8
+ shr r2 = exp_GR_N, 0x7
+}
+{ .mfi
+ and exp_GR_index_2_16 = 0x70, exp_GR_N
+ fnma.s1 exp_f = EXP_Nfloat, exp_ln2_by_128_lo, f1
+ nop.i 999
+;;
+}
+
+
+// EXP_AD_T1 has address of T1
+// EXP_AD_T2 has address if T2
+
+{ .mmi
+ addl exp_GR_biased_M = 0xffff, r2
+ add EXP_AD_T2 = EXP_AD_TB2, exp_GR_index_2_16
+ shladd EXP_AD_T1 = exp_GR_index_1, 4, EXP_AD_TB1
+;;
+}
+
+
+// Create Scale = 2^M
+// r = x - Nfloat * ln2_by_128_hi
+// f = 1 - Nfloat * ln2_by_128_lo
+
+{ .mmi
+ setf.exp EXP_2M = exp_GR_biased_M
+ ldfe exp_T2 = [EXP_AD_T2]
+ nop.i 999
+;;
+}
+
+// Load T1 and T2
+{ .mfi
+ ldfe exp_T1 = [EXP_AD_T1]
+ nop.f 999
+ nop.i 999
+;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 exp_rsq = exp_r, exp_r, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 exp_rP4pP3 = exp_r, exp_P4, exp_P3
+ nop.i 999
+;;
+}
+
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 exp_rcube = exp_r, exp_rsq, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 exp_P_lo = exp_r, exp_rP4pP3, exp_P2
+ nop.i 999
+;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 exp_P_hi = exp_rsq, exp_P1, exp_r
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 exp_S2 = exp_f,exp_T2,f0
+ nop.i 999
+;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 exp_S1 = EXP_2M,exp_T1,f0
+ nop.i 999
+;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 exp_P = exp_rcube, exp_P_lo, exp_P_hi
+ nop.i 999
+;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 exp_S = exp_S1,exp_S2,f0
+ nop.i 999
+;;
+}
+
+{ .bbb
+(p12) br.cond.spnt L(EXP_CERTAIN_UNDERFLOW)
+(p13) br.cond.spnt L(EXP_POSSIBLE_UNDERFLOW)
+(p14) br.cond.spnt L(EXP_POSSIBLE_OVERFLOW)
+;;
+}
+
+
+{ .mfb
+ nop.m 999
+ fma.d f8 = exp_S, exp_P, exp_S
+ br.ret.sptk b0 ;; // Normal path exit
+}
+
+
+L(EXP_POSSIBLE_OVERFLOW):
+
+// We got an answer. EXP_MAX_DBL_NORM_ARG < x < EXP_MIN_DBL_OFLOW_ARG
+// overflow is a possibility, not a certainty
+
+{ .mfi
+ nop.m 999
+ fsetc.s2 0x7F,0x42
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.d.s2 exp_wre_urm_f8 = exp_S, exp_P, exp_S
+ nop.i 999 ;;
+}
+
+// We define an overflow when the answer with
+// WRE set
+// user-defined rounding mode
+// is ldn +1
+
+// Is the exponent 1 more than the largest double?
+// If so, go to ERROR RETURN, else get the answer and
+// leave.
+
+// Largest double is 7FE (biased double)
+// 7FE - 3FF + FFFF = 103FE
+// Create + largest_double_plus_ulp
+// Create - largest_double_plus_ulp
+// Calculate answer with WRE set.
+
+// Cases when answer is ldn+1 are as follows:
+// ldn ldn+1
+// --+----------|----------+------------
+// |
+// +inf +inf -inf
+// RN RN
+// RZ
+
+{ .mfi
+ nop.m 999
+ fsetc.s2 0x7F,0x40
+ mov exp_GR_gt_ln = 0x103ff ;;
+}
+
+{ .mfi
+ setf.exp exp_gt_pln = exp_GR_gt_ln
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+ fcmp.ge.unc.s1 p6, p0 = exp_wre_urm_f8, exp_gt_pln
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+ nop.f 999
+(p6) br.cond.spnt L(EXP_CERTAIN_OVERFLOW) ;; // Branch if really overflow
+}
+
+{ .mfb
+ nop.m 999
+ fma.d f8 = exp_S, exp_P, exp_S
+ br.ret.sptk b0 ;; // Exit if really no overflow
+}
+
+L(EXP_CERTAIN_OVERFLOW):
+{ .mmi
+ sub exp_GR_17ones_m1 = exp_GR_17ones, r0, 1 ;;
+ setf.exp f9 = exp_GR_17ones_m1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+ fmerge.s FR_X = f8,f8
+ nop.i 999
+}
+{ .mfb
+ mov GR_Parameter_TAG = 14
+ fma.d FR_RESULT = f9, f9, f0 // Set I,O and +INF result
+ br.cond.sptk __libm_error_region ;;
+}
+
+L(EXP_POSSIBLE_UNDERFLOW):
+
+// We got an answer. EXP_MAX_DBL_UFLOW_ARG < x < EXP_MIN_DBL_NORM_ARG
+// underflow is a possibility, not a certainty
+
+// We define an underflow when the answer with
+// ftz set
+// is zero (tiny numbers become zero)
+
+// Notice (from below) that if we have an unlimited exponent range,
+// then there is an extra machine number E between the largest denormal and
+// the smallest normal.
+
+// So if with unbounded exponent we round to E or below, then we are
+// tiny and underflow has occurred.
+
+// But notice that you can be in a situation where we are tiny, namely
+// rounded to E, but when the exponent is bounded we round to smallest
+// normal. So the answer can be the smallest normal with underflow.
+
+// E
+// -----+--------------------+--------------------+-----
+// | | |
+// 1.1...10 2^-3fff 1.1...11 2^-3fff 1.0...00 2^-3ffe
+// 0.1...11 2^-3ffe (biased, 1)
+// largest dn smallest normal
+
+{ .mfi
+ nop.m 999
+ fsetc.s2 0x7F,0x41
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+ fma.d.s2 exp_ftz_urm_f8 = exp_S, exp_P, exp_S
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+ fsetc.s2 0x7F,0x40
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+ fcmp.eq.unc.s1 p6, p0 = exp_ftz_urm_f8, f0
+ nop.i 999 ;;
+}
+{ .mfb
+ nop.m 999
+ nop.f 999
+(p6) br.cond.spnt L(EXP_CERTAIN_UNDERFLOW) ;; // Branch if really underflow
+}
+{ .mfb
+ nop.m 999
+ fma.d f8 = exp_S, exp_P, exp_S
+ br.ret.sptk b0 ;; // Exit if really no underflow
+}
+
+L(EXP_CERTAIN_UNDERFLOW):
+{ .mfi
+ nop.m 999
+ fmerge.s FR_X = f8,f8
+ nop.i 999
+}
+{ .mfb
+ mov GR_Parameter_TAG = 15
+ fma.d FR_RESULT = exp_S, exp_P, exp_S // Set I,U and tiny result
+ br.cond.sptk __libm_error_region ;;
+}
+
+L(EXP_CERTAIN_UNDERFLOW_ZERO):
+{ .mmi
+ mov exp_GR_one = 1 ;;
+ setf.exp f9 = exp_GR_one
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+ fmerge.s FR_X = f8,f8
+ nop.i 999
+}
+{ .mfb
+ mov GR_Parameter_TAG = 15
+ fma.d FR_RESULT = f9, f9, f0 // Set I,U and tiny (+0.0) result
+ br.cond.sptk __libm_error_region ;;
+}
+
+.endp exp
+ASM_SIZE_DIRECTIVE(exp)
+
+
+.proc __libm_error_region
+__libm_error_region:
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfd [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mib
+ stfd [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
+}
+{ .mib
+ stfd [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+{ .mmi
+ ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_expf.S b/sysdeps/ia64/fpu/e_expf.S
new file mode 100644
index 0000000..1288cb9
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_expf.S
@@ -0,0 +1,768 @@
+.file "expf.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+
+// History
+//==============================================================
+// 4/04/00 Unwind update
+// 4/04/00 Unwind support added
+// 8/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+// 8/21/00 Improvements to save 2 cycles on main path, and shorten x=0 case
+// 12/07/00 Widen main path, shorten x=inf, nan paths
+//
+
+#include "libm_support.h"
+
+// Assembly macros
+//==============================================================
+// integer registers used
+
+ exp_GR_0x0f = r33
+ exp_GR_0xf0 = r34
+
+ EXP_AD_P_1 = r36
+ EXP_AD_P_2 = r37
+ EXP_AD_T1 = r38
+ EXP_AD_T2 = r39
+ exp_GR_Mint = r40
+
+ exp_GR_Mint_p_128 = r41
+ exp_GR_Ind1 = r42
+ EXP_AD_M1 = r43
+ exp_GR_Ind2 = r44
+ EXP_AD_M2 = r45
+
+ exp_GR_min_oflow = r46
+ exp_GR_max_zero = r47
+ exp_GR_max_norm = r48
+ exp_GR_max_uflow = r49
+ exp_GR_min_norm = r50
+
+ exp_GR_17ones = r51
+ exp_GR_gt_ln = r52
+ exp_GR_T2_size = r53
+
+ exp_GR_17ones_m1 = r56
+ exp_GR_one = r57
+
+
+
+GR_SAVE_B0 = r53
+GR_SAVE_PFS = r55
+GR_SAVE_GP = r54
+
+GR_Parameter_X = r59
+GR_Parameter_Y = r60
+GR_Parameter_RESULT = r61
+GR_Parameter_TAG = r62
+
+FR_X = f10
+FR_Y = f1
+FR_RESULT = f8
+
+
+// floating point registers used
+
+ EXP_MIN_SGL_OFLOW_ARG = f11
+ EXP_MAX_SGL_ZERO_ARG = f12
+ EXP_MAX_SGL_NORM_ARG = f13
+ EXP_MAX_SGL_UFLOW_ARG = f14
+ EXP_MIN_SGL_NORM_ARG = f15
+
+ exp_coeff_P5 = f32
+ exp_coeff_P6 = f33
+ exp_coeff_P3 = f34
+ exp_coeff_P4 = f35
+
+ exp_coeff_P1 = f36
+ exp_coeff_P2 = f37
+ exp_Mx = f38
+ exp_Mfloat = f39
+ exp_R = f40
+
+ exp_P1 = f41
+ exp_P2 = f42
+ exp_P3 = f43
+ exp_Rsq = f44
+ exp_R4 = f45
+
+ exp_P4 = f46
+ exp_P5 = f47
+ exp_P6 = f48
+ exp_P7 = f49
+ exp_T1 = f50
+
+ exp_T2 = f51
+ exp_T = f52
+ exp_A = f53
+ exp_norm_f8 = f54
+ exp_wre_urm_f8 = f55
+
+ exp_ftz_urm_f8 = f56
+ exp_gt_pln = f57
+
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
+.align 16
+
+exp_coeff_1_table:
+ASM_TYPE_DIRECTIVE(exp_coeff_1_table,@object)
+data8 0x3F56F35FDE4F8563 // p5
+data8 0x3F2A378BEFECCFDD // p6
+data8 0x3FE00000258C581D // p1
+data8 0x3FC555557AE7B3D4 // p2
+ASM_SIZE_DIRECTIVE(exp_coeff_1_table)
+
+
+exp_coeff_2_table:
+ASM_TYPE_DIRECTIVE(exp_coeff_2_table,@object)
+data8 0x3FA5551BB6592FAE // p3
+data8 0x3F8110E8EBFFD485 // p4
+ASM_SIZE_DIRECTIVE(exp_coeff_2_table)
+
+
+exp_T2_table:
+ASM_TYPE_DIRECTIVE(exp_T2_table,@object)
+data8 0xa175cf9cd7d85844 , 0x00003f46 // exp(-128)
+data8 0xdb7279415a1f9eed , 0x00003f47 // exp(-127)
+data8 0x95213b242bd8ca5f , 0x00003f49 // exp(-126)
+data8 0xcab03c968c989f83 , 0x00003f4a // exp(-125)
+data8 0x89bdb674702961ad , 0x00003f4c // exp(-124)
+data8 0xbb35a2eec278be35 , 0x00003f4d // exp(-123)
+data8 0xfe71b17f373e7e7a , 0x00003f4e // exp(-122)
+data8 0xace9a6ec52a39b63 , 0x00003f50 // exp(-121)
+data8 0xeb03423fe393cf1c , 0x00003f51 // exp(-120)
+data8 0x9fb52c5bcaef1693 , 0x00003f53 // exp(-119)
+data8 0xd910b6377ed60bf1 , 0x00003f54 // exp(-118)
+data8 0x9382dad8a9fdbfe4 , 0x00003f56 // exp(-117)
+data8 0xc87d0a84dea869a3 , 0x00003f57 // exp(-116)
+data8 0x883efb4c6d1087b0 , 0x00003f59 // exp(-115)
+data8 0xb92d7373dce9a502 , 0x00003f5a // exp(-114)
+data8 0xfbaeb020577fb0cb , 0x00003f5b // exp(-113)
+ASM_SIZE_DIRECTIVE(exp_T2_table)
+
+
+exp_T1_table:
+ASM_TYPE_DIRECTIVE(exp_T1_table,@object)
+data8 0x8000000000000000 , 0x00003fff // exp(16 * 0)
+data8 0x87975e8540010249 , 0x00004016 // exp(16 * 1)
+data8 0x8fa1fe625b3163ec , 0x0000402d // exp(16 * 2)
+data8 0x9826b576512a59d7 , 0x00004044 // exp(16 * 3)
+data8 0xa12cc167acbe6902 , 0x0000405b // exp(16 * 4)
+data8 0xaabbcdcc279f59e4 , 0x00004072 // exp(16 * 5)
+data8 0xb4dbfaadc045d16f , 0x00004089 // exp(16 * 6)
+data8 0xbf95e372ccdbf146 , 0x000040a0 // exp(16 * 7)
+data8 0xcaf2a62eea10bbfb , 0x000040b7 // exp(16 * 8)
+data8 0xd6fbeb62fddbd340 , 0x000040ce // exp(16 * 9)
+data8 0xe3bbee32e4a440ea , 0x000040e5 // exp(16 * 10)
+data8 0xf13d8517c34199a8 , 0x000040fc // exp(16 * 11)
+data8 0xff8c2b166241eedd , 0x00004113 // exp(16 * 12)
+data8 0x875a04c0b38d6129 , 0x0000412b // exp(16 * 13)
+data8 0x8f610127db6774d7 , 0x00004142 // exp(16 * 14)
+data8 0x97e1dd87e5c20bb6 , 0x00004159 // exp(16 * 15)
+ASM_SIZE_DIRECTIVE(exp_T1_table)
+
+// Argument Reduction
+// exp_Mx = (int)f8 ==> The value of f8 rounded to int is placed into the
+// significand of exp_Mx as a two's
+// complement number.
+
+// Later we want to have exp_Mx in a general register. Do this with a getf.sig
+// and call the general register exp_GR_Mint
+
+// exp_Mfloat = (float)(int)f8 ==> the two's complement number in
+// significand of exp_Mx is turned
+// into a floating point number.
+// R = 1 - exp_Mfloat ==> reduced argument
+
+// Core Approximation
+// Calculate a series in R
+// R * p6 + p5
+// R * p4 + p3
+// R * p2 + p1
+// R^2
+// R^4
+// R^2(R * p6 + p5) + (R * p4 + p3)
+// R^2(R * p2 + p1)
+// R^4(R^2(R * p6 + p5) + (R * p4 + p3)) + (R^2(R * p2 + p1))
+// R + 1
+// exp(R) = (1 + R) + R^4(R^2(R * p6 + p5) + (R * p4 + p3)) + (R^2(R * p2 + p1))
+// exp(R) = 1 + R + R^2 * p1 + R^3 * p2 + R^4 * p3 + R^5 * p4 + R^6 * p5 + R^7 * p6
+
+// Reconstruction
+// signficand of exp_Mx is two's complement,
+// -103 < x < 89
+// The smallest single denormal is 2^-149 = ssdn
+// For e^x = ssdn
+// x = log(ssdn) = -103.279
+// But with rounding result goes to ssdn until -103.972079
+// The largest single normal is 1.<23 1's> 2^126 ~ 2^127 = lsn
+// For e^x = lsn
+// x = log(lsn) = 88.7228
+//
+// expf overflows when x > 42b17218 = 88.7228
+// expf returns largest single denormal when x = c2aeac50
+// expf goes to zero when x < c2cff1b5
+
+// Consider range of 8-bit two's complement, -128 ---> 127
+// Add 128; range becomes 0 ---> 255
+
+// The number (=i) in 0 ---> 255 is used as offset into two tables.
+
+// i = abcd efgh = abcd * 16 + efgh = i1 * 16 + i2
+
+// i1 = (exp_GR_Mint + 128) & 0xf0 (show 0xf0 as -0x10 to avoid assembler error)
+// (The immediate in the AND is an 8-bit two's complement)
+// i1 = i1 + start of T1 table (EXP_AD_T1)
+// Note that the entries in T1 are double-extended numbers on 16-byte boundaries
+// and that i1 is already shifted left by 16 after the AND.
+
+// i2 must be shifted left by 4 before adding to the start of the table.
+// i2 = ((exp_GR_Mint + 128) & 0x0f) << 4
+// i2 = i2 + start of T2 table (EXP_AD_T2)
+
+// T = T1 * T2
+// A = T * (1 + R)
+// answer = T * (R^2 * p1 + R^3 * p2 + R^4 * p3 + R^5 * p4 + R^6 * p5 + R^7 * p6) +
+// T * (1 + R)
+// = T * exp(R)
+
+
+.global expf#
+
+.section .text
+.proc expf#
+.align 32
+expf:
+#ifdef _LIBC
+.global __ieee754_expf#
+__ieee754_expf:
+#endif
+
+{ .mfi
+ alloc r32 = ar.pfs,1,26,4,0
+ fcvt.fx.s1 exp_Mx = f8
+ mov exp_GR_17ones = 0x1FFFF
+}
+{ .mlx
+ addl EXP_AD_P_1 = @ltoff(exp_coeff_1_table),gp
+ movl exp_GR_min_oflow = 0x42b17218
+}
+;;
+
+// Fnorm done to take any enabled faults
+{ .mfi
+ ld8 EXP_AD_P_1 = [EXP_AD_P_1]
+ fclass.m p6,p0 = f8, 0x07 //@zero
+ nop.i 999
+}
+{ .mfi
+ add exp_GR_max_norm = -1, exp_GR_min_oflow // 0x42b17217
+ fnorm exp_norm_f8 = f8
+ nop.i 999
+}
+;;
+
+{ .mfi
+ setf.s EXP_MIN_SGL_OFLOW_ARG = exp_GR_min_oflow // 0x42b17218
+ fclass.m p7,p0 = f8, 0x22 // Test for x=-inf
+ mov exp_GR_0xf0 = 0x0f0
+}
+{ .mlx
+ setf.s EXP_MAX_SGL_NORM_ARG = exp_GR_max_norm
+ movl exp_GR_max_zero = 0xc2cff1b5
+}
+;;
+
+
+{ .mlx
+ mov exp_GR_0x0f = 0x00f
+ movl exp_GR_max_uflow = 0xc2aeac50
+}
+{ .mfb
+ nop.m 999
+(p6) fma.s f8 = f1,f1,f0
+(p6) br.ret.spnt b0 // quick exit for x=0
+}
+;;
+
+{ .mfi
+ setf.s EXP_MAX_SGL_ZERO_ARG = exp_GR_max_zero
+ fclass.m p8,p0 = f8, 0x21 // Test for x=+inf
+ adds exp_GR_min_norm = 1, exp_GR_max_uflow // 0xc2aeac51
+}
+{ .mfb
+ ldfpd exp_coeff_P5,exp_coeff_P6 = [EXP_AD_P_1],16
+(p7) fma.s f8 = f0,f0,f0
+(p7) br.ret.spnt b0 // quick exit for x=-inf
+}
+;;
+
+{ .mmf
+ ldfpd exp_coeff_P1,exp_coeff_P2 = [EXP_AD_P_1],16
+ setf.s EXP_MAX_SGL_UFLOW_ARG = exp_GR_max_uflow
+ fclass.m p9,p0 = f8, 0xc3 // Test for x=nan
+}
+;;
+
+{ .mmb
+ ldfpd exp_coeff_P3,exp_coeff_P4 = [EXP_AD_P_1],16
+ setf.s EXP_MIN_SGL_NORM_ARG = exp_GR_min_norm
+(p8) br.ret.spnt b0 // quick exit for x=+inf
+}
+;;
+
+// EXP_AD_P_1 now points to exp_T2_table
+{ .mfi
+ mov exp_GR_T2_size = 0x100
+ fcvt.xf exp_Mfloat = exp_Mx
+ nop.i 999
+}
+;;
+
+{ .mfb
+ getf.sig exp_GR_Mint = exp_Mx
+(p9) fmerge.s f8 = exp_norm_f8, exp_norm_f8
+(p9) br.ret.spnt b0 // quick exit for x=nan
+}
+;;
+
+{ .mmi
+ nop.m 999
+ mov EXP_AD_T2 = EXP_AD_P_1
+ add EXP_AD_T1 = exp_GR_T2_size,EXP_AD_P_1 ;;
+}
+
+
+{ .mmi
+ adds exp_GR_Mint_p_128 = 0x80,exp_GR_Mint ;;
+ and exp_GR_Ind1 = exp_GR_Mint_p_128, exp_GR_0xf0
+ and exp_GR_Ind2 = exp_GR_Mint_p_128, exp_GR_0x0f ;;
+}
+
+// Divide arguments into the following categories:
+// Certain Underflow/zero p11 - -inf < x <= MAX_SGL_ZERO_ARG
+// Certain Underflow p12 - MAX_SGL_ZERO_ARG < x <= MAX_SGL_UFLOW_ARG
+// Possible Underflow p13 - MAX_SGL_UFLOW_ARG < x < MIN_SGL_NORM_ARG
+// Certain Safe - MIN_SGL_NORM_ARG <= x <= MAX_SGL_NORM_ARG
+// Possible Overflow p14 - MAX_SGL_NORM_ARG < x < MIN_SGL_OFLOW_ARG
+// Certain Overflow p15 - MIN_SGL_OFLOW_ARG <= x < +inf
+//
+// If the input is really a single arg, then there will never be "Possible
+// Underflow" or "Possible Overflow" arguments.
+//
+
+{ .mfi
+ add EXP_AD_M1 = exp_GR_Ind1,EXP_AD_T1
+ fcmp.ge.s1 p15,p14 = exp_norm_f8,EXP_MIN_SGL_OFLOW_ARG
+ nop.i 999
+}
+{ .mfi
+ shladd EXP_AD_M2 = exp_GR_Ind2,4,EXP_AD_T2
+ fms.s1 exp_R = f1,f8,exp_Mfloat
+ nop.i 999 ;;
+}
+
+{ .mfi
+ ldfe exp_T1 = [EXP_AD_M1]
+ fcmp.le.s1 p11,p12 = exp_norm_f8,EXP_MAX_SGL_ZERO_ARG
+ nop.i 999 ;;
+}
+
+{ .mfb
+ ldfe exp_T2 = [EXP_AD_M2]
+(p14) fcmp.gt.s1 p14,p0 = exp_norm_f8,EXP_MAX_SGL_NORM_ARG
+(p15) br.cond.spnt L(EXP_CERTAIN_OVERFLOW) ;;
+}
+
+{ .mfb
+ nop.m 999
+(p12) fcmp.le.s1 p12,p0 = exp_norm_f8,EXP_MAX_SGL_UFLOW_ARG
+(p11) br.cond.spnt L(EXP_CERTAIN_UNDERFLOW_ZERO)
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p13) fcmp.lt.s1 p13,p0 = exp_norm_f8,EXP_MIN_SGL_NORM_ARG
+ nop.i 999
+}
+;;
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 exp_Rsq = exp_R,exp_R,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 exp_P3 = exp_R,exp_coeff_P2,exp_coeff_P1
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 exp_P1 = exp_R,exp_coeff_P6,exp_coeff_P5
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 exp_P2 = exp_R,exp_coeff_P4,exp_coeff_P3
+ nop.i 999
+}
+;;
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 exp_P7 = f1,exp_R,f1
+ nop.i 999
+}
+;;
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 exp_P5 = exp_Rsq,exp_P3,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 exp_R4 = exp_Rsq,exp_Rsq,f0
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 exp_T = exp_T1,exp_T2,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 exp_P4 = exp_Rsq,exp_P1,exp_P2
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 exp_A = exp_T,exp_P7,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 exp_P6 = exp_R4,exp_P4,exp_P5
+ nop.i 999
+}
+;;
+
+{ .bbb
+(p12) br.cond.spnt L(EXP_CERTAIN_UNDERFLOW)
+(p13) br.cond.spnt L(EXP_POSSIBLE_UNDERFLOW)
+(p14) br.cond.spnt L(EXP_POSSIBLE_OVERFLOW)
+}
+;;
+
+{ .mfb
+ nop.m 999
+ fma.s f8 = exp_T,exp_P6,exp_A
+ br.ret.sptk b0
+}
+;;
+
+L(EXP_POSSIBLE_OVERFLOW):
+
+// We got an answer. EXP_MAX_SGL_NORM_ARG < x < EXP_MIN_SGL_OFLOW_ARG
+// overflow is a possibility, not a certainty
+// Set wre in s2 and perform the last operation with s2
+
+// We define an overflow when the answer with
+// WRE set
+// user-defined rounding mode
+// is lsn +1
+
+// Is the exponent 1 more than the largest single?
+// If so, go to ERROR RETURN, else (no overflow) get the answer and
+// leave.
+
+// Largest single is FE (biased single)
+// FE - 7F + FFFF = 1007E
+
+// Create + largest_single_plus_ulp
+// Create - largest_single_plus_ulp
+
+// Calculate answer with WRE set.
+
+// Cases when answer is lsn+1 are as follows:
+
+// midpoint
+// |
+// lsn | lsn+1
+// --+----------|----------+------------
+// |
+// +inf +inf -inf
+// RN RN
+// RZ
+// exp_gt_pln contains the floating point number lsn+1.
+// The setf.exp puts 0x1007f in the exponent and 0x800... in the significand.
+
+// If the answer is >= lsn+1, we have overflowed.
+// Then p6 is TRUE. Set the overflow tag, save input in FR_X,
+// do the final calculation for IEEE result, and branch to error return.
+
+{ .mfi
+ mov exp_GR_gt_ln = 0x1007F
+ fsetc.s2 0x7F,0x42
+ nop.i 999
+}
+;;
+
+{ .mfi
+ setf.exp exp_gt_pln = exp_GR_gt_ln
+ fma.s.s2 exp_wre_urm_f8 = exp_T, exp_P6, exp_A
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fsetc.s2 0x7F,0x40
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fcmp.ge.unc.s1 p6, p0 = exp_wre_urm_f8, exp_gt_pln
+ nop.i 999
+}
+;;
+
+{ .mfb
+ nop.m 999
+ nop.f 999
+(p6) br.cond.spnt L(EXP_CERTAIN_OVERFLOW) // Branch if really overflow
+}
+;;
+
+{ .mfb
+ nop.m 999
+ fma.s f8 = exp_T, exp_P6, exp_A
+ br.ret.sptk b0 // Exit if really no overflow
+}
+;;
+
+L(EXP_CERTAIN_OVERFLOW):
+{ .mmi
+ sub exp_GR_17ones_m1 = exp_GR_17ones, r0, 1 ;;
+ setf.exp f9 = exp_GR_17ones_m1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+ fmerge.s FR_X = f8,f8
+ nop.i 999
+}
+{ .mfb
+ mov GR_Parameter_TAG = 16
+ fma.s FR_RESULT = f9, f9, f0 // Set I,O and +INF result
+ br.cond.sptk __libm_error_region ;;
+}
+
+L(EXP_POSSIBLE_UNDERFLOW):
+
+// We got an answer. EXP_MAX_SGL_UFLOW_ARG < x < EXP_MIN_SGL_NORM_ARG
+// underflow is a possibility, not a certainty
+
+// We define an underflow when the answer with
+// ftz set
+// is zero (tiny numbers become zero)
+
+// Notice (from below) that if we have an unlimited exponent range,
+// then there is an extra machine number E between the largest denormal and
+// the smallest normal.
+
+// So if with unbounded exponent we round to E or below, then we are
+// tiny and underflow has occurred.
+
+// But notice that you can be in a situation where we are tiny, namely
+// rounded to E, but when the exponent is bounded we round to smallest
+// normal. So the answer can be the smallest normal with underflow.
+
+// E
+// -----+--------------------+--------------------+-----
+// | | |
+// 1.1...10 2^-7f 1.1...11 2^-7f 1.0...00 2^-7e
+// 0.1...11 2^-7e (biased, 1)
+// largest dn smallest normal
+
+// If the answer is = 0, we have underflowed.
+// Then p6 is TRUE. Set the underflow tag, save input in FR_X,
+// do the final calculation for IEEE result, and branch to error return.
+
+{ .mfi
+ nop.m 999
+ fsetc.s2 0x7F,0x41
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s.s2 exp_ftz_urm_f8 = exp_T, exp_P6, exp_A
+ nop.i 999
+}
+;;
+
+
+{ .mfi
+ nop.m 999
+ fsetc.s2 0x7F,0x40
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fcmp.eq.unc.s1 p6, p0 = exp_ftz_urm_f8, f0
+ nop.i 999
+}
+;;
+
+{ .mfb
+ nop.m 999
+ nop.f 999
+(p6) br.cond.spnt L(EXP_CERTAIN_UNDERFLOW) // Branch if really underflow
+}
+;;
+
+{ .mfb
+ nop.m 999
+ fma.s f8 = exp_T, exp_P6, exp_A
+ br.ret.sptk b0 // Exit if really no underflow
+}
+;;
+
+L(EXP_CERTAIN_UNDERFLOW):
+{ .mfi
+ nop.m 999
+ fmerge.s FR_X = f8,f8
+ nop.i 999
+}
+{ .mfb
+ mov GR_Parameter_TAG = 17
+ fma.s FR_RESULT = exp_T, exp_P6, exp_A // Set I,U and tiny result
+ br.cond.sptk __libm_error_region ;;
+}
+
+L(EXP_CERTAIN_UNDERFLOW_ZERO):
+{ .mmi
+ mov exp_GR_one = 1 ;;
+ setf.exp f9 = exp_GR_one
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+ fmerge.s FR_X = f8,f8
+ nop.i 999
+}
+{ .mfb
+ mov GR_Parameter_TAG = 17
+ fma.s FR_RESULT = f9, f9, f0 // Set I,U and tiny (+0.0) result
+ br.cond.sptk __libm_error_region ;;
+}
+
+.endp expf
+ASM_SIZE_DIRECTIVE(expf)
+
+
+.proc __libm_error_region
+__libm_error_region:
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 999
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfs [GR_Parameter_Y] = FR_Y,16 // Store Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mfi
+ stfs [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
+ nop.f 0
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+}
+{ .mib
+ stfs [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+
+{ .mmi
+ ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_expl.c b/sysdeps/ia64/fpu/e_expl.c
new file mode 100644
index 0000000..41254ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_expl.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/e_fmod.S b/sysdeps/ia64/fpu/e_fmod.S
new file mode 100644
index 0000000..ae641f4
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_fmod.S
@@ -0,0 +1,538 @@
+.file "fmod.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
+// Bob Norin, Shane Story, and Ping Tak Peter Tang of the Computational
+// Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//====================================================================
+// 2/02/00 Initial version
+// 3/02/00 New Algorithm
+// 4/04/00 Unwind support added
+// 8/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+//11/28/00 Set FR_Y to f9
+//
+// API
+//====================================================================
+// double fmod(double,double);
+//
+// Overview of operation
+//====================================================================
+// fmod(a,b)=a-i*b,
+// where i is an integer such that, if b!=0,
+// |i|<|a/b| and |a/b-i|<1
+//
+// Algorithm
+//====================================================================
+// a). if |a|<|b|, return a
+// b). get quotient and reciprocal overestimates accurate to
+// 33 bits (q2,y2)
+// c). if the exponent difference (exponent(a)-exponent(b))
+// is less than 32, truncate quotient to integer and
+// finish in one iteration
+// d). if exponent(a)-exponent(b)>=32 (q2>=2^32)
+// round quotient estimate to single precision (k=RN(q2)),
+// calculate partial remainder (a'=a-k*b),
+// get quotient estimate (a'*y2), and repeat from c).
+//
+// Special cases
+//====================================================================
+// b=+/-0: return NaN, call libm_error_support
+// a=+/-Inf, a=NaN or b=NaN: return NaN
+//
+// Registers used
+//====================================================================
+// Predicate registers: p6-p11
+// General registers: r2,r29,r32 (ar.pfs), r33-r39
+// Floating point registers: f6-f15
+
+#include "libm_support.h"
+
+.section .text
+
+
+GR_SAVE_B0 = r33
+GR_SAVE_PFS = r34
+GR_SAVE_GP = r35
+GR_SAVE_SP = r36
+
+GR_Parameter_X = r37
+GR_Parameter_Y = r38
+GR_Parameter_RESULT = r39
+GR_Parameter_TAG = r40
+
+FR_X = f10
+FR_Y = f9
+FR_RESULT = f8
+
+
+.proc fmod#
+.align 32
+.global fmod#
+.align 32
+
+fmod:
+#ifdef _LIBC
+.global __ieee754_fmod
+.type __ieee754_fmod,@function
+__ieee754_fmod:
+#endif
+// inputs in f8, f9
+// result in f8
+
+{ .mfi
+ alloc r32=ar.pfs,1,4,4,0
+ // f6=|a|
+ fmerge.s f6=f0,f8
+ mov r2 = 0x0ffdd
+}
+ {.mfi
+ nop.m 0
+ // f7=|b|
+ fmerge.s f7=f0,f9
+ nop.i 0;;
+}
+
+{ .mfi
+ setf.exp f11 = r2
+ // (1) y0
+ frcpa.s1 f10,p6=f6,f7
+ nop.i 0
+}
+
+// Y +-NAN, +-inf, +-0? p7
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p7,p0 = f9, 0xe7
+ nop.i 999;;
+}
+
+// qnan snan inf norm unorm 0 -+
+// 1 1 1 0 0 0 11
+// e 3
+// X +-NAN, +-inf, ? p9
+
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p9,p0 = f8, 0xe3
+ nop.i 999
+}
+
+// |x| < |y|? Return x p8
+{ .mfi
+ nop.m 999
+(p0) fcmp.lt.unc.s1 p8,p0 = f6,f7
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 0
+ // normalize y (if |x|<|y|)
+ (p8) fma.s0 f9=f9,f1,f0
+ nop.i 0;;
+}
+
+ { .mfi
+ mov r2=0x1001f
+ // (2) q0=a*y0
+ (p6) fma.s1 f13=f6,f10,f0
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ // (3) e0 = 1 - b * y0
+ (p6) fnma.s1 f12=f7,f10,f1
+ nop.i 0;;
+}
+
+ {.mfi
+ nop.m 0
+ // normalize x (if |x|<|y|)
+ (p8) fma.d.s0 f8=f8,f1,f0
+ nop.i 0
+}
+{.bbb
+ (p9) br.cond.spnt L(FMOD_X_NAN_INF)
+ (p7) br.cond.spnt L(FMOD_Y_NAN_INF_ZERO)
+ // if |x|<|y|, return
+ (p8) br.ret.spnt b0;;
+}
+
+ {.mfi
+ nop.m 0
+ // normalize x
+ fma.s0 f6=f6,f1,f0
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // normalize y
+ fma.s0 f7=f7,f1,f0
+ nop.i 0;;
+}
+
+ {.mfi
+ // f15=2^32
+ setf.exp f15=r2
+ // (4) q1=q0+e0*q0
+ (p6) fma.s1 f13=f12,f13,f13
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ // (5) e1 = e0 * e0 + 2^-34
+ (p6) fma.s1 f14=f12,f12,f11
+ nop.i 0;;
+}
+{.mlx
+ nop.m 0
+ movl r2=0x33a00000;;
+}
+{ .mfi
+ nop.m 0
+ // (6) y1 = y0 + e0 * y0
+ (p6) fma.s1 f10=f12,f10,f10
+ nop.i 0;;
+}
+{.mfi
+ // set f12=1.25*2^{-24}
+ setf.s f12=r2
+ // (7) q2=q1+e1*q1
+ (p6) fma.s1 f13=f13,f14,f13
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ fmerge.s f9=f8,f9
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ // (8) y2 = y1 + e1 * y1
+ (p6) fma.s1 f10=f14,f10,f10
+ // set p6=0, p10=0
+ cmp.ne.and p6,p10=r0,r0;;
+}
+
+.align 32
+L(loop53):
+ {.mfi
+ nop.m 0
+ // compare q2, 2^32
+ fcmp.lt.unc.s1 p8,p7=f13,f15
+ nop.i 0
+}
+ {.mfi
+ nop.m 0
+ // will truncate quotient to integer, if exponent<32 (in advance)
+ fcvt.fx.trunc.s1 f11=f13
+ nop.i 0;;
+}
+ {.mfi
+ nop.m 0
+ // if exponent>32, round quotient to single precision (perform in advance)
+ fma.s.s1 f13=f13,f1,f0
+ nop.i 0;;
+}
+ {.mfi
+ nop.m 0
+ // set f12=sgn(a)
+ (p8) fmerge.s f12=f8,f1
+ nop.i 0
+}
+ {.mfi
+ nop.m 0
+ // normalize truncated quotient
+ (p8) fcvt.xf f13=f11
+ nop.i 0;;
+}
+ { .mfi
+ nop.m 0
+ // calculate remainder (assuming f13=RZ(Q))
+ (p7) fnma.s1 f14=f13,f7,f6
+ nop.i 0
+}
+ {.mfi
+ nop.m 0
+ // also if exponent>32, round quotient to single precision
+ // and subtract 1 ulp: q=q-q*(1.25*2^{-24})
+ (p7) fnma.s.s1 f11=f13,f12,f13
+ nop.i 0;;
+}
+
+ {.mfi
+ nop.m 0
+ // (p8) calculate remainder (82-bit format)
+ (p8) fnma.s1 f11=f13,f7,f6
+ nop.i 0
+}
+ {.mfi
+ nop.m 0
+ // (p7) calculate remainder (assuming f11=RZ(Q))
+ (p7) fnma.s1 f6=f11,f7,f6
+ nop.i 0;;
+}
+
+
+ {.mfi
+ nop.m 0
+ // Final iteration (p8): is f6 the correct remainder (quotient was not overestimated) ?
+ (p8) fcmp.lt.unc.s1 p6,p10=f11,f0
+ nop.i 0;;
+}
+ {.mfi
+ nop.m 0
+ // get new quotient estimation: a'*y2
+ (p7) fma.s1 f13=f14,f10,f0
+ nop.i 0
+}
+ {.mfb
+ nop.m 0
+ // was f14=RZ(Q) ? (then new remainder f14>=0)
+ (p7) fcmp.lt.unc.s1 p7,p9=f14,f0
+ nop.b 0;;
+}
+
+
+.pred.rel "mutex",p6,p10
+ {.mfb
+ nop.m 0
+ // add b to estimated remainder (to cover the case when the quotient was overestimated)
+ // also set correct sign by using f9=|b|*sgn(a), f12=sgn(a)
+ (p6) fma.d.s0 f8=f11,f12,f9
+ nop.b 0
+}
+ {.mfb
+ nop.m 0
+ // calculate remainder (single precision)
+ // set correct sign of result before returning
+ (p10) fma.d.s0 f8=f11,f12,f0
+ (p8) br.ret.sptk b0;;
+}
+ {.mfi
+ nop.m 0
+ // if f13!=RZ(Q), get alternative quotient estimation: a''*y2
+ (p7) fma.s1 f13=f6,f10,f0
+ nop.i 0
+}
+ {.mfb
+ nop.m 0
+ // if f14 was RZ(Q), set remainder to f14
+ (p9) mov f6=f14
+ br.cond.sptk L(loop53);;
+}
+
+
+
+L(FMOD_X_NAN_INF):
+
+// Y zero ?
+{.mfi
+ nop.m 0
+ fma.s1 f10=f9,f1,f0
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ fcmp.eq.unc.s1 p11,p0=f10,f0
+ nop.i 0;;
+}
+{.mib
+ nop.m 0
+ nop.i 0
+ // if Y zero
+ (p11) br.cond.spnt L(FMOD_Y_ZERO);;
+}
+
+// X infinity? Return QNAN indefinite
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p8,p9 = f8, 0x23
+ nop.i 999;;
+}
+// Y NaN ?
+{.mfi
+ nop.m 999
+(p8) fclass.m p9,p8=f9,0xc3
+ nop.i 0;;
+}
+{.mfi
+ nop.m 999
+(p8) frcpa.s0 f8,p0 = f8,f8
+ nop.i 0
+}
+{ .mfi
+ nop.m 999
+ // also set Denormal flag if necessary
+(p8) fma.s0 f9=f9,f1,f0
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p8) fma.d f8=f8,f1,f0
+ nop.b 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p9) frcpa.s0 f8,p7=f8,f9
+ br.ret.sptk b0 ;;
+}
+
+
+L(FMOD_Y_NAN_INF_ZERO):
+
+// Y INF
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p7,p0 = f9, 0x23
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p7) fma.d f8=f8,f1,f0
+(p7) br.ret.spnt b0 ;;
+}
+
+// Y NAN?
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p9,p0 = f9, 0xc3
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p9) fma.d f8=f9,f1,f0
+(p9) br.ret.spnt b0 ;;
+}
+
+L(FMOD_Y_ZERO):
+// Y zero? Must be zero at this point
+// because it is the only choice left.
+// Return QNAN indefinite
+
+{.mfi
+ nop.m 0
+ // set Invalid
+ frcpa f12,p0=f0,f0
+ nop.i 0
+}
+// X NAN?
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p9,p10 = f8, 0xc3
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p10) fclass.nm p9,p10 = f8, 0xff
+ nop.i 999 ;;
+}
+
+{.mfi
+ nop.m 999
+ (p9) frcpa f11,p7=f8,f0
+ nop.i 0;;
+}
+
+{ .mfi
+ nop.m 999
+(p10) frcpa f11,p7 = f9,f9
+(p0) mov GR_Parameter_TAG = 121 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fmerge.s f10 = f8, f8
+ nop.i 999
+}
+
+{ .mfb
+ nop.m 999
+(p0) fma.d f8=f11,f1,f0
+(p0) br.sptk __libm_error_region;;
+}
+
+.endp fmod
+ASM_SIZE_DIRECTIVE(fmod)
+ASM_SIZE_DIRECTIVE(__ieee754_fmod)
+
+.proc __libm_error_region
+__libm_error_region:
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfd [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mib
+ stfd [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
+}
+{ .mib
+ stfd [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+{ .mmi
+ ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_fmodf.S b/sysdeps/ia64/fpu/e_fmodf.S
new file mode 100644
index 0000000..9ac03a9
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_fmodf.S
@@ -0,0 +1,553 @@
+.file "fmodf.s"
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
+// Bob Norin, Shane Story, and Ping Tak Peter Tang of the Computational
+// Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//====================================================================
+// 2/02/00 Initial version
+// 3/02/00 New Algorithm
+// 4/04/00 Unwind support added
+// 8/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+//11/28/00 Set FR_Y to f9
+//
+// API
+//====================================================================
+// float fmodf(float,float);
+//
+// Overview of operation
+//====================================================================
+// fmod(a,b)=a-i*b,
+// where i is an integer such that, if b!=0,
+// |i|<|a/b| and |a/b-i|<1
+
+// Algorithm
+//====================================================================
+// a). if |a|<|b|, return a
+// b). get quotient and reciprocal overestimates accurate to
+// 33 bits (q2,y2)
+// c). if the exponent difference (exponent(a)-exponent(b))
+// is less than 32, truncate quotient to integer and
+// finish in one iteration
+// d). if exponent(a)-exponent(b)>=32 (q2>=2^32)
+// round quotient estimate to single precision (k=RN(q2)),
+// calculate partial remainder (a'=a-k*b),
+// get quotient estimate (a'*y2), and repeat from c).
+
+// Special cases
+//====================================================================
+// b=+/-0: return NaN, call libm_error_support
+// a=+/-Inf, a=NaN or b=NaN: return NaN
+
+// Registers used
+//====================================================================
+// Predicate registers: p6-p11
+// General registers: r2,r29,r32 (ar.pfs), r33-r39
+// Floating point registers: f6-f15
+
+#include "libm_support.h"
+
+.section .text
+
+GR_SAVE_B0 = r33
+GR_SAVE_PFS = r34
+GR_SAVE_GP = r35
+GR_SAVE_SP = r36
+
+GR_Parameter_X = r37
+GR_Parameter_Y = r38
+GR_Parameter_RESULT = r39
+GR_Parameter_TAG = r40
+
+FR_X = f10
+FR_Y = f9
+FR_RESULT = f8
+
+
+
+.proc fmodf#
+.align 32
+.global fmodf#
+.align 32
+
+fmodf:
+#ifdef _LIBC
+.global __ieee754_fmodf
+.type __ieee754_fmodf,@function
+__ieee754_fmodf:
+#endif
+// inputs in f8, f9
+// result in f8
+
+{ .mfi
+ alloc r32=ar.pfs,1,4,4,0
+ // f6=|a|
+ fmerge.s f6=f0,f8
+ mov r2 = 0x0ffdd
+}
+ {.mfi
+ nop.m 0
+ // f7=|b|
+ fmerge.s f7=f0,f9
+ nop.i 0;;
+}
+
+{ .mfi
+ setf.exp f11 = r2
+ // (1) y0
+ frcpa.s1 f10,p6=f6,f7
+ nop.i 0
+}
+
+// eliminate special cases
+// Y +-NAN, +-inf, +-0? p7
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p7,p0 = f9, 0xe7
+ nop.i 999;;
+}
+
+// qnan snan inf norm unorm 0 -+
+// 1 1 1 0 0 0 11
+// e 3
+// X +-NAN, +-inf, ? p9
+
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p9,p0 = f8, 0xe3
+ nop.i 999
+}
+
+// |x| < |y|? Return x p8
+{ .mfi
+ nop.m 999
+(p0) fcmp.lt.unc.s1 p8,p0 = f6,f7
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 0
+ // normalize y (if |x|<|y|)
+ (p8) fma.s0 f9=f9,f1,f0
+ nop.i 0;;
+}
+
+ { .mfi
+ mov r2=0x1001f
+ // (2) q0=a*y0
+ (p6) fma.s1 f13=f6,f10,f0
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ // (3) e0 = 1 - b * y0
+ (p6) fnma.s1 f12=f7,f10,f1
+ nop.i 0;;
+}
+
+ {.mfi
+ nop.m 0
+ // normalize x (if |x|<|y|)
+ (p8) fma.s.s0 f8=f8,f1,f0
+ nop.i 0
+}
+{.bbb
+ (p9) br.cond.spnt L(FMOD_X_NAN_INF)
+ (p7) br.cond.spnt L(FMOD_Y_NAN_INF_ZERO)
+ // if |x|<|y|, return
+ (p8) br.ret.spnt b0;;
+}
+
+ {.mfi
+ nop.m 0
+ // normalize x
+ fma.s0 f6=f6,f1,f0
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // normalize y
+ fma.s0 f7=f7,f1,f0
+ nop.i 0;;
+}
+
+
+ {.mfi
+ // f15=2^32
+ setf.exp f15=r2
+ // (4) q1=q0+e0*q0
+ (p6) fma.s1 f13=f12,f13,f13
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ // (5) e1 = e0 * e0 + 2^-34
+ (p6) fma.s1 f14=f12,f12,f11
+ nop.i 0;;
+}
+{.mlx
+ nop.m 0
+ movl r2=0x33a00000;;
+}
+{ .mfi
+ nop.m 0
+ // (6) y1 = y0 + e0 * y0
+ (p6) fma.s1 f10=f12,f10,f10
+ nop.i 0;;
+}
+{.mfi
+ // set f12=1.25*2^{-24}
+ setf.s f12=r2
+ // (7) q2=q1+e1*q1
+ (p6) fma.s1 f13=f13,f14,f13
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ fmerge.s f9=f8,f9
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ // (8) y2 = y1 + e1 * y1
+ (p6) fma.s1 f10=f14,f10,f10
+ // set p6=0, p10=0
+ cmp.ne.and p6,p10=r0,r0;;
+}
+
+.align 32
+L(loop24):
+ {.mfi
+ nop.m 0
+ // compare q2, 2^32
+ fcmp.lt.unc.s1 p8,p7=f13,f15
+ nop.i 0
+}
+ {.mfi
+ nop.m 0
+ // will truncate quotient to integer, if exponent<32 (in advance)
+ fcvt.fx.trunc.s1 f11=f13
+ nop.i 0;;
+}
+ {.mfi
+ nop.m 0
+ // if exponent>32, round quotient to single precision (perform in advance)
+ fma.s.s1 f13=f13,f1,f0
+ nop.i 0;;
+}
+ {.mfi
+ nop.m 0
+ // set f12=sgn(a)
+ (p8) fmerge.s f12=f8,f1
+ nop.i 0
+}
+ {.mfi
+ nop.m 0
+ // normalize truncated quotient
+ (p8) fcvt.xf f13=f11
+ nop.i 0;;
+}
+ { .mfi
+ nop.m 0
+ // calculate remainder (assuming f13=RZ(Q))
+ (p7) fnma.s1 f14=f13,f7,f6
+ nop.i 0
+}
+ {.mfi
+ nop.m 0
+ // also if exponent>32, round quotient to single precision
+ // and subtract 1 ulp: q=q-q*(1.25*2^{-24})
+ (p7) fnma.s.s1 f11=f13,f12,f13
+ nop.i 0;;
+}
+
+ {.mfi
+ nop.m 0
+ // (p8) calculate remainder (82-bit format)
+ (p8) fnma.s1 f11=f13,f7,f6
+ nop.i 0
+}
+ {.mfi
+ nop.m 0
+ // (p7) calculate remainder (assuming f11=RZ(Q))
+ (p7) fnma.s1 f6=f11,f7,f6
+ nop.i 0;;
+}
+
+
+ {.mfi
+ nop.m 0
+ // Final iteration (p8): is f6 the correct remainder (quotient was not overestimated) ?
+ (p8) fcmp.lt.unc.s1 p6,p10=f11,f0
+ nop.i 0;;
+}
+ {.mfi
+ nop.m 0
+ // get new quotient estimation: a'*y2
+ (p7) fma.s1 f13=f14,f10,f0
+ nop.i 0
+}
+ {.mfb
+ nop.m 0
+ // was f14=RZ(Q) ? (then new remainder f14>=0)
+ (p7) fcmp.lt.unc.s1 p7,p9=f14,f0
+ nop.b 0;;
+}
+
+
+.pred.rel "mutex",p6,p10
+ {.mfb
+ nop.m 0
+ // add b to estimated remainder (to cover the case when the quotient was overestimated)
+ // also set correct sign by using f9=|b|*sgn(a), f12=sgn(a)
+ (p6) fma.s.s0 f8=f11,f12,f9
+ nop.b 0
+}
+ {.mfb
+ nop.m 0
+ // calculate remainder (single precision)
+ // set correct sign of result before returning
+ (p10) fma.s.s0 f8=f11,f12,f0
+ (p8) br.ret.sptk b0;;
+}
+ {.mfi
+ nop.m 0
+ // if f13!=RZ(Q), get alternative quotient estimation: a''*y2
+ (p7) fma.s1 f13=f6,f10,f0
+ nop.i 0
+}
+ {.mfb
+ nop.m 0
+ // if f14 was RZ(Q), set remainder to f14
+ (p9) mov f6=f14
+ br.cond.sptk L(loop24);;
+}
+
+ { .mmb
+ nop.m 0
+ nop.m 0
+ br.ret.sptk b0;;
+ }
+
+L(FMOD_X_NAN_INF):
+
+
+// Y zero ?
+{.mfi
+ nop.m 0
+ fma.s1 f10=f9,f1,f0
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ fcmp.eq.unc.s1 p11,p0=f10,f0
+ nop.i 0;;
+}
+{.mib
+ nop.m 0
+ nop.i 0
+ // if Y zero
+ (p11) br.cond.spnt L(FMOD_Y_ZERO);;
+}
+
+// X infinity? Return QNAN indefinite
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p8,p9 = f8, 0x23
+ nop.i 999;;
+}
+// Y NaN ?
+{.mfi
+ nop.m 999
+(p8) fclass.m p9,p8=f9,0xc3
+ nop.i 0;;
+}
+{.mfi
+ nop.m 999
+(p8) frcpa.s0 f8,p0 = f8,f8
+ nop.i 0
+}
+{ .mfi
+ nop.m 999
+ // also set Denormal flag if necessary
+(p8) fma.s0 f9=f9,f1,f0
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p8) fma.s f8=f8,f1,f0
+ nop.b 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p9) frcpa.s0 f8,p7=f8,f9
+ br.ret.sptk b0 ;;
+}
+
+
+L(FMOD_Y_NAN_INF_ZERO):
+
+// Y INF
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p7,p0 = f9, 0x23
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p7) fma.s f8=f8,f1,f0
+(p7) br.ret.spnt b0 ;;
+}
+
+// Y NAN?
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p9,p0 = f9, 0xc3
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p9) fma.s f8=f9,f1,f0
+(p9) br.ret.spnt b0 ;;
+}
+
+L(FMOD_Y_ZERO):
+// Y zero? Must be zero at this point
+// because it is the only choice left.
+// Return QNAN indefinite
+
+{.mfi
+ nop.m 0
+ // set Invalid
+ frcpa f12,p0=f0,f0
+ nop.i 999
+}
+// X NAN?
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p9,p10 = f8, 0xc3
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p10) fclass.nm p9,p10 = f8, 0xff
+ nop.i 999 ;;
+}
+
+{.mfi
+ nop.m 999
+ (p9) frcpa f11,p7=f8,f0
+ nop.i 0;;
+}
+
+{ .mfi
+ nop.m 999
+(p10) frcpa f11,p7 = f0,f0
+nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fmerge.s f10 = f8, f8
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s f8=f11,f1,f0
+ nop.i 999;;
+}
+
+L(EXP_ERROR_RETURN):
+
+
+{ .mib
+ nop.m 0
+(p0) mov GR_Parameter_TAG=122
+(p0) br.sptk __libm_error_region;;
+}
+
+.endp fmodf
+ASM_SIZE_DIRECTIVE(fmodf)
+ASM_SIZE_DIRECTIVE(__ieee754_fmodf)
+
+.proc __libm_error_region
+__libm_error_region:
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfs [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mib
+ stfs [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
+}
+{ .mib
+ stfs [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support#;; // Call error handling function
+}
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+{ .mmi
+ ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_fmodl.S b/sysdeps/ia64/fpu/e_fmodl.S
new file mode 100644
index 0000000..7fbfd43
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_fmodl.S
@@ -0,0 +1,577 @@
+.file "fmodl.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
+// Bob Norin, Shane Story, and Ping Tak Peter Tang of the Computational
+// Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//====================================================================
+// 2/02/00 Initial version
+// 3/02/00 New Algorithm
+// 4/04/00 Unwind support added
+// 8/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+//11/28/00 Set FR_Y to f9
+//
+// API
+//====================================================================
+// long double fmodl(long double,long double);
+//
+// Overview of operation
+//====================================================================
+// fmod(a,b)=a-i*b,
+// where i is an integer such that, if b!=0,
+// |i|<|a/b| and |a/b-i|<1
+//
+// Algorithm
+//====================================================================
+// a). if |a|<|b|, return a
+// b). get quotient and reciprocal overestimates accurate to
+// 33 bits (q2,y2)
+// c). if the exponent difference (exponent(a)-exponent(b))
+// is less than 32, truncate quotient to integer and
+// finish in one iteration
+// d). if exponent(a)-exponent(b)>=32 (q2>=2^32)
+// round quotient estimate to single precision (k=RN(q2)),
+// calculate partial remainder (a'=a-k*b),
+// get quotient estimate (a'*y2), and repeat from c).
+//
+// Registers used
+//====================================================================
+// Predicate registers: p6-p11
+// General registers: r2,r29,r32 (ar.pfs), r33-r39
+// Floating point registers: f6-f15
+
+#include "libm_support.h"
+
+.section .text
+
+GR_SAVE_B0 = r33
+GR_SAVE_PFS = r34
+GR_SAVE_GP = r35
+GR_SAVE_SP = r36
+
+GR_Parameter_X = r37
+GR_Parameter_Y = r38
+GR_Parameter_RESULT = r39
+GR_Parameter_TAG = r40
+
+FR_X = f10
+FR_Y = f9
+FR_RESULT = f8
+
+
+
+.proc fmodl#
+.align 32
+.global fmodl#
+.align 32
+
+fmodl:
+#ifdef _LIBC
+.global __ieee754_fmodl
+.type __ieee754_fmodl,@function
+__ieee754_fmodl:
+#endif
+// inputs in f8, f9
+// result in f8
+
+{ .mfi
+ alloc r32=ar.pfs,1,4,4,0
+ // f6=|a|
+ fmerge.s f6=f0,f8
+ mov r2 = 0x0ffdd
+}
+ {.mfi
+ getf.sig r29=f9
+ // f7=|b|
+ fmerge.s f7=f0,f9
+ nop.i 0;;
+}
+
+{ .mfi
+ setf.exp f11 = r2
+ // (1) y0
+ frcpa.s1 f10,p6=f6,f7
+ nop.i 0;;
+}
+
+// eliminate special cases
+{.mmi
+nop.m 0
+nop.m 0
+// y pseudo-zero ?
+cmp.eq p7,p10=r29,r0;;
+}
+
+// Y +-NAN, +-inf, +-0? p7
+{ .mfi
+ nop.m 999
+(p10) fclass.m p7,p10 = f9, 0xe7
+ nop.i 999;;
+}
+
+// qnan snan inf norm unorm 0 -+
+// 1 1 1 0 0 0 11
+// e 3
+// X +-NAN, +-inf, ? p9
+
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p9,p11 = f8, 0xe3
+ nop.i 999
+}
+
+// |x| < |y|? Return x p8
+{ .mfi
+ nop.m 999
+(p10) fcmp.lt.unc.s1 p8,p0 = f6,f7
+ nop.i 999 ;;
+}
+
+ { .mfi
+ mov r2=0x1001f
+ // (2) q0=a*y0
+ (p6) fma.s1 f13=f6,f10,f0
+ nop.i 0
+} { .mfi
+ nop.m 0
+ // (3) e0 = 1 - b * y0
+ (p6) fnma.s1 f12=f7,f10,f1
+ nop.i 0;;
+}
+
+// Y +-NAN, +-inf, +-0? p7
+{ .mfi
+ nop.m 999
+ // pseudo-NaN ?
+(p10) fclass.nm p7,p0 = f9, 0xff
+ nop.i 999
+}
+
+// qnan snan inf norm unorm 0 -+
+// 1 1 1 0 0 0 11
+// e 3
+// X +-NAN, +-inf, ? p9
+
+{ .mfi
+ nop.m 999
+(p11) fclass.nm p9,p0 = f8, 0xff
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 0
+ // y denormal ? set D flag (if |x|<|y|)
+ (p8) fnma.s0 f10=f9,f1,f9
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // normalize x (if |x|<|y|)
+ (p8) fma.s0 f8=f8,f1,f0
+ nop.i 0
+}
+{.bbb
+ (p9) br.cond.spnt L(FMOD_X_NAN_INF)
+ (p7) br.cond.spnt L(FMOD_Y_NAN_INF_ZERO)
+ // if |x|<|y|, return
+ (p8) br.ret.spnt b0;;
+}
+
+ {.mfi
+ nop.m 0
+ // x denormal ? set D flag
+ fnma.s0 f32=f6,f1,f6
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // y denormal ? set D flag
+ fnma.s0 f33=f7,f1,f7
+ nop.i 0;;
+}
+
+ {.mfi
+ // f15=2^32
+ setf.exp f15=r2
+ // (4) q1=q0+e0*q0
+ (p6) fma.s1 f13=f12,f13,f13
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ // (5) e1 = e0 * e0 + 2^-34
+ (p6) fma.s1 f14=f12,f12,f11
+ nop.i 0;;
+}
+{.mlx
+ nop.m 0
+ movl r2=0x33a00000;;
+}
+{ .mfi
+ nop.m 0
+ // (6) y1 = y0 + e0 * y0
+ (p6) fma.s1 f10=f12,f10,f10
+ nop.i 0;;
+}
+{.mfi
+ // set f12=1.25*2^{-24}
+ setf.s f12=r2
+ // (7) q2=q1+e1*q1
+ (p6) fma.s1 f13=f13,f14,f13
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ fmerge.s f9=f8,f9
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ // (8) y2 = y1 + e1 * y1
+ (p6) fma.s1 f10=f14,f10,f10
+ // set p6=0, p10=0
+ cmp.ne.and p6,p10=r0,r0;;
+}
+
+
+.align 32
+L(loop64):
+ {.mfi
+ nop.m 0
+ // compare q2, 2^32
+ fcmp.lt.unc.s1 p8,p7=f13,f15
+ nop.i 0
+}
+ {.mfi
+ nop.m 0
+ // will truncate quotient to integer, if exponent<32 (in advance)
+ fcvt.fx.trunc.s1 f11=f13
+ nop.i 0;;
+}
+ {.mfi
+ nop.m 0
+ // if exponent>32, round quotient to single precision (perform in advance)
+ fma.s.s1 f13=f13,f1,f0
+ nop.i 0;;
+}
+
+
+ {.mfi
+ nop.m 0
+ // set f12=sgn(a)
+ (p8) fmerge.s f12=f8,f1
+ nop.i 0
+}
+ {.mfi
+ nop.m 0
+ // normalize truncated quotient
+ (p8) fcvt.xf f13=f11
+ nop.i 0;;
+}
+ { .mfi
+ nop.m 0
+ // calculate remainder (assuming f13=RZ(Q))
+ (p7) fnma.s1 f14=f13,f7,f6
+ nop.i 0
+}
+ {.mfi
+ nop.m 0
+ // also if exponent>32, round quotient to single precision
+ // and subtract 1 ulp: q=q-q*(1.25*2^{-24})
+ (p7) fnma.s.s1 f11=f13,f12,f13
+ nop.i 0;;
+}
+
+ {.mfi
+ nop.m 0
+ // (p8) calculate remainder (82-bit format)
+ (p8) fnma.s1 f11=f13,f7,f6
+ nop.i 0
+}
+ {.mfi
+ nop.m 0
+ // (p7) calculate remainder (assuming f11=RZ(Q))
+ (p7) fnma.s1 f6=f11,f7,f6
+ nop.i 0;;
+}
+
+
+ {.mfi
+ nop.m 0
+ // Final iteration (p8): is f6 the correct remainder (quotient was not overestimated) ?
+ (p8) fcmp.lt.unc.s1 p6,p10=f11,f0
+ nop.i 0;;
+}
+ {.mfi
+ nop.m 0
+ // get new quotient estimation: a'*y2
+ (p7) fma.s1 f13=f14,f10,f0
+ nop.i 0
+}
+ {.mfb
+ nop.m 0
+ // was f13=RZ(Q) ? (then new remainder f14>=0)
+ (p7) fcmp.lt.unc.s1 p7,p9=f14,f0
+ nop.b 0;;
+}
+
+
+.pred.rel "mutex",p6,p10
+ {.mfb
+ nop.m 0
+ // add b to estimated remainder (to cover the case when the quotient was overestimated)
+ // also set correct sign by using f9=|b|*sgn(a), f12=sgn(a)
+ (p6) fma.s0 f8=f11,f12,f9
+ nop.b 0
+}
+ {.mfb
+ nop.m 0
+ // set correct sign of result before returning: f12=sgn(a)
+ (p10) fma.s0 f8=f11,f12,f0
+ (p8) br.ret.sptk b0;;
+}
+ {.mfi
+ nop.m 0
+ // if f13!=RZ(Q), get alternative quotient estimation: a''*y2
+ (p7) fma.s1 f13=f6,f10,f0
+ nop.i 0
+}
+ {.mfb
+ nop.m 0
+ // if f14 was RZ(Q), set remainder to f14
+ (p9) mov f6=f14
+ br.cond.sptk L(loop64);;
+}
+
+
+
+L(FMOD_X_NAN_INF):
+
+// Y zero ?
+{.mfi
+ nop.m 0
+ fma.s1 f10=f9,f1,f0
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ fcmp.eq.unc.s1 p11,p0=f10,f0
+ nop.i 0;;
+}
+{.mib
+ nop.m 0
+ nop.i 0
+ // if Y zero
+ (p11) br.cond.spnt L(FMOD_Y_ZERO);;
+}
+
+// X infinity? Return QNAN indefinite
+{ .mfi
+ // set p7 t0 0
+ cmp.ne p7,p0=r0,r0
+(p0) fclass.m.unc p8,p9 = f8, 0x23
+ nop.i 999;;
+}
+// Y NaN ?
+{.mfi
+ nop.m 999
+(p8) fclass.m p9,p8=f9,0xc3
+ nop.i 0;;
+}
+// Y not pseudo-zero ? (r29 holds significand)
+{.mii
+ nop.m 999
+(p8) cmp.ne p7,p0=r29,r0
+ nop.i 0;;
+}
+{.mfi
+ nop.m 999
+(p8) frcpa.s0 f8,p0 = f8,f8
+ nop.i 0
+}
+{ .mfi
+ nop.m 999
+ // also set Denormal flag if necessary
+(p7) fnma.s0 f9=f9,f1,f9
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p8) fma.s0 f8=f8,f1,f0
+ nop.b 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p9) frcpa.s0 f8,p7=f8,f9
+ br.ret.sptk b0 ;;
+}
+
+
+L(FMOD_Y_NAN_INF_ZERO):
+// Y INF
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p7,p0 = f9, 0x23
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p7) fma f8=f8,f1,f0
+(p7) br.ret.spnt b0 ;;
+}
+
+// Y NAN?
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p9,p10 = f9, 0xc3
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p10) fclass.nm p9,p0 = f9, 0xff
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p9) fma f8=f9,f1,f0
+(p9) br.ret.spnt b0 ;;
+}
+
+L(FMOD_Y_ZERO):
+// Y zero? Must be zero at this point
+// because it is the only choice left.
+// Return QNAN indefinite
+
+{.mfi
+ nop.m 0
+ // set Invalid
+ frcpa f12,p0=f0,f0
+ nop.i 0
+}
+// X NAN?
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p9,p10 = f8, 0xc3
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p10) fclass.nm p9,p10 = f8, 0xff
+ nop.i 999 ;;
+}
+
+{.mfi
+ nop.m 999
+ (p9) frcpa f11,p7=f8,f0
+ nop.i 0;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p10) frcpa f11,p7 = f9,f9
+(p0) mov GR_Parameter_TAG = 120 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fmerge.s f10 = f8, f8
+ nop.i 999
+}
+
+{ .mfb
+ nop.m 999
+(p0) fma f8=f11,f1,f0
+(p0) br.sptk __libm_error_region;;
+}
+
+.endp fmodl
+ASM_SIZE_DIRECTIVE(fmodl)
+ASM_SIZE_DIRECTIVE(__ieee754_fmodl)
+
+
+.proc __libm_error_region
+__libm_error_region:
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfe [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mib
+ stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
+}
+{ .mib
+ stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+{ .mmi
+ ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_hypot.S b/sysdeps/ia64/fpu/e_hypot.S
new file mode 100644
index 0000000..2fc9633
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_hypot.S
@@ -0,0 +1,438 @@
+.file "hypot.asm"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
+// Bob Norin, Shane Story, and Ping Tak Peter Tang of the
+// Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// *********************************************************************
+//
+// History:
+// 2/02/00 hand-optimized
+// 4/04/00 Unwind support added
+// 6/20/00 new version
+// 8/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+//
+// *********************************************************************
+// ___________
+// Function: hypot(x,y) = |(x^2 + y^2) = for double precision values
+// x and y
+// Also provides cabs functionality.
+//
+// *********************************************************************
+//
+// Resources Used:
+//
+// Floating-Point Registers: f8 (Input and Return Value)
+// f9 (Input)
+// f6 -f15, f32-f34
+//
+// General Purpose Registers:
+// r2,r3,r29 (Scratch)
+// r32-r36 (Locals)
+// r37-r40 (Used to pass arguments to error handling routine)
+//
+// Predicate Registers: p6 - p10
+//
+// *********************************************************************
+//
+// IEEE Special Conditions:
+//
+// All faults and exceptions should be raised correctly.
+// Overflow can occur.
+// hypot(Infinity and anything) = +Infinity
+// hypot(QNaN and anything) = QNaN
+// hypot(SNaN and anything ) = QNaN
+//
+// *********************************************************************
+//
+// Implementation:
+// x2 = x * x in double-extended
+// y2 = y * y in double-extended
+// temp = x2 + y2 in double-extended
+// sqrt(temp) rounded to double
+//
+// *********************************************************************
+
+#include "libm_support.h"
+
+GR_SAVE_PFS = r33
+GR_SAVE_B0 = r34
+GR_SAVE_GP = r35
+GR_Parameter_X = r36
+GR_Parameter_Y = r37
+GR_Parameter_RESULT = r38
+GR_Parameter_TAG = r39
+
+FR_X = f32
+FR_Y = f33
+FR_RESULT = f8
+
+.section .text
+#ifndef _LIBC
+.proc cabs#
+.global cabs#
+cabs:
+.endp cabs
+#endif
+.proc hypot#
+.global hypot#
+.align 64
+
+hypot:
+#ifdef _LIBC
+.global __hypot
+__hypot:
+.global __ieee754_hypot
+__ieee754_hypot:
+#endif
+{.mfi
+ alloc r32= ar.pfs,0,4,4,0
+ // Compute x*x
+ fma.s1 f10=f8,f8,f0
+ // r2=bias-1
+ mov r2=0xfffe
+}
+{.mfi
+ // 63/8
+ mov r3=0x40fc //0000
+ // y*y
+ fma.s1 f11=f9,f9,f0
+ // r29=429/16
+ mov r29=0x41d68;; //000
+}
+
+{ .mfi
+ nop.m 0
+// Check if x is an Inf - if so return Inf even
+// if y is a NaN (C9X)
+ fclass.m.unc p7, p6 = f8, 0x023
+ shl r3=r3,16
+}
+{.mfi
+ nop.m 0
+ // if possible overflow, copy f8 to f32
+ // set Denormal, if necessary
+ // (p8)
+ fma.d.s0 f32=f8,f1,f0
+ nop.i 0;;
+}
+{ .mfi
+ nop.m 0
+// Check if y is an Inf - if so return Inf even
+// if x is a NaN (C9X)
+ fclass.m.unc p8, p9 = f9, 0x023
+ shl r29=r29,12
+}
+{ .mfb
+ // f7=0.5
+ setf.exp f7=r2
+// For x=inf, multiply y by 1 to raise invalid on y an SNaN
+// (p7) fma.s0 f9=f9,f1,f0
+ // copy f9 to f33; set Denormal, if necessary
+ fma.d.s0 f33=f9,f1,f0
+ nop.b 0;;
+}
+{.mfb
+ // f13=63/8
+ setf.s f13=r3
+ // is y Zero ?
+ (p6) fclass.m p6,p0=f9,0x7
+ nop.b 0
+}
+{.mlx
+ nop.m 0
+ movl r2=0x408c0000;;
+}
+
+{.mfi
+ // f34=429/16
+ setf.s f34=r29
+ // is x Zero ?
+ (p9) fclass.m p9,p0=f8,0x7
+ // 231/16
+ mov r3=0x4167;; //0000
+}
+{.mfi
+ nop.m 0
+ // a=x2+y2
+ fma.s1 f12=f10,f1,f11
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ // y not NaN ?
+ (p9) fclass.m p8,p0=f9,0x3f
+ shl r3=r3,16
+}
+{.mfi
+ nop.m 0
+ // f6=2
+ fma.s1 f6=f1,f1,f1
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // x not NaN ?
+ (p6) fclass.m p7,p0=f8,0x3f
+ nop.i 0;;
+}
+{.mfi
+ // f9=35/8
+ setf.s f9=r2
+ nop.f 0
+ // 2*emax-2
+ mov r2=0x107fb;;
+}
+
+{.mfb
+ nop.m 0
+ // if f8=Infinity or f9=Zero, return |f8|
+ (p7) fmerge.s f8=f0,f32
+ (p7) br.ret.spnt b0
+}
+{.mfb
+ nop.m 0
+ // if f9=Infinity or f8=Zero, return |f9|
+ (p8) fmerge.s f8=f0,f33
+ (p8) br.ret.spnt b0;;
+}
+
+
+{.mfi
+ // f10 =231/16
+ setf.s f10=r3
+ // z0=frsqrta(a)
+ frsqrta.s1 f8,p6=f12
+ nop.i 0;;
+}
+
+{ .mfi
+ nop.m 0
+// Identify Natvals, Infs, NaNs, and Zeros
+// and return result
+ fclass.m.unc p7, p0 = f12, 0x1E7
+ nop.i 0;;
+}
+{.mfb
+ // get exponent of x^2+y^2
+ getf.exp r3=f12
+ // if special case, set f8
+ (p7) mov f8=f12
+ (p7) br.ret.spnt b0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // S0=a*z0
+ (p6) fma.s1 f14=f12,f8,f0
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // H0=0.5*z0
+ (p6) fma.s1 f15=f8,f7,f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // f6=5/2
+ fma.s1 f6=f7,f1,f6
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // f11=3/2
+ fma.s1 f11=f7,f1,f1
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // d=0.5-S0*H0
+ (p6) fnma.s1 f7=f14,f15,f7
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // P67=231/16+429/16*d
+ (p6) fma.s1 f10=f34,f7,f10
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // P45=63/8*d+35/8
+ (p6) fma.s1 f9=f13,f7,f9
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ // P23=5/2*d+3/2
+ (p6) fma.s1 f11=f6,f7,f11
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // d2=d*d
+ (p6) fma.s1 f13=f7,f7,f0
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // P47=d2*P67+P45
+ (p6) fma.s1 f10=f10,f13,f9
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // P13=d*P23+1
+ (p6) fma.s1 f11=f11,f7,f1
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ // d3=d2*d
+ (p6) fma.s1 f13=f13,f7,f0
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // T0=d*S0
+ (p6) fma.s1 f15=f7,f14,f0
+ nop.i 0
+}
+{.mfi
+ // Is x^2 + y^2 well less than the overflow
+ // threshold?
+ (p6) cmp.lt.unc p7, p8 = r3,r2
+ // P=P13+d3*P47
+ (p6) fma.s1 f10=f13,f10,f11
+ nop.i 0;;
+}
+
+{.mfb
+ nop.m 0
+ // S=P*T0+S0
+ fma.d.s0 f8=f10,f15,f14
+ // No overflow in this case
+ (p7) br.ret.sptk b0;;
+}
+
+{ .mfi
+ nop.m 0
+(p8) fsetc.s2 0x7F,0x42
+ // Possible overflow path, must detect by
+ // Setting widest range exponent with prevailing
+ // rounding mode.
+ nop.i 0 ;;
+}
+
+
+{ .mfi
+ // bias+0x400 (bias+EMAX+1)
+ (p8) mov r2=0x103ff
+ // S=P*T0+S0
+ (p8) fma.d.s2 f12=f10,f15,f14
+ nop.i 0 ;;
+}
+{ .mfi
+(p8) setf.exp f11 = r2
+(p8) fsetc.s2 0x7F,0x40
+// Restore Original Mode in S2
+ nop.i 0 ;;
+}
+{ .mfi
+ nop.m 0
+(p8) fcmp.lt.unc.s1 p9, p10 = f12, f11
+ nop.i 0 ;;
+}
+{ .mib
+ nop.m 0
+ mov GR_Parameter_TAG = 46
+ // No overflow
+(p9) br.ret.sptk b0;;
+}
+.endp hypot
+ASM_SIZE_DIRECTIVE(hypot)
+
+.proc __libm_error_region
+__libm_error_region:
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfd [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mib
+ stfd [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
+}
+{ .mib
+ stfd [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+{ .mmi
+ ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_hypotf.S b/sysdeps/ia64/fpu/e_hypotf.S
new file mode 100644
index 0000000..18a5e32
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_hypotf.S
@@ -0,0 +1,394 @@
+.file "hypotf.asm"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
+// Bob Norin, Shane Story, and Ping Tak Peter Tang of the
+// Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// *********************************************************************
+//
+// History:
+// 2/02/00 hand-optimized
+// 4/04/00 Unwind support added
+// 6/26/00 new version
+// 8/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+//
+// *********************************************************************
+// ___________
+// Function: hypotf(x,y) = |(x^2 + y^2) = for single precision values
+// x and y
+// Also provides cabsf functionality.
+//
+// *********************************************************************
+//
+// Resources Used:
+//
+// Floating-Point Registers: f8 (Input and Return Value)
+// f9 (Input)
+// f6 -f15
+//
+// General Purpose Registers:
+// r2-r3 (Scratch)
+// r32-r36 (Locals)
+// r37-r40 (Used to pass arguments to error handling routine)
+//
+// Predicate Registers: p6 - p10
+//
+// *********************************************************************
+//
+// IEEE Special Conditions:
+//
+// All faults and exceptions should be raised correctly.
+// Overflow can occur.
+// hypotf(Infinity and anything) = +Infinity
+// hypotf(QNaN and anything) = QNaN
+// hypotf(SNaN and anything ) = QNaN
+//
+// *********************************************************************
+//
+// Implementation:
+// x2 = x * x in double-extended
+// y2 = y * y in double-extended
+// temp = x2 + y2 in double-extended
+// sqrt(temp) rounded to single precision
+//
+// *********************************************************************
+
+#include "libm_support.h"
+
+GR_SAVE_PFS = r33
+GR_SAVE_B0 = r34
+GR_SAVE_GP = r35
+GR_Parameter_X = r36
+GR_Parameter_Y = r37
+GR_Parameter_RESULT = r38
+GR_Parameter_TAG = r39
+
+FR_X = f14
+FR_Y = f15
+FR_RESULT = f8
+
+.section .text
+#ifndef _LIBC
+.proc cabsf#
+.global cabsf#
+cabsf:
+.endp cabsf
+#endif
+.proc hypotf#
+.global hypotf#
+.align 64
+
+hypotf:
+#ifdef _LIBC
+.global __hypotf
+__hypotf:
+.global __ieee754_hypotf
+__ieee754_hypotf:
+#endif
+{.mfi
+ alloc r32= ar.pfs,0,4,4,0
+ // Compute x*x
+ fma.s1 f10=f8,f8,f0
+ // r2=bias-1
+ mov r2=0xfffe
+}
+{.mfi
+ nop.m 0
+ // y*y
+ fma.s1 f11=f9,f9,f0
+ nop.i 0;;
+}
+
+{ .mfi
+ nop.m 0
+// Check if x is an Inf - if so return Inf even
+// if y is a NaN (C9X)
+ fclass.m.unc p7, p6 = f8, 0x023
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // if possible overflow, copy f8 to f14
+ // set Denormal, if necessary
+ // (p8)
+ fma.s.s0 f14=f8,f1,f0
+ nop.i 0;;
+}
+
+{ .mfi
+ nop.m 0
+// Check if y is an Inf - if so return Inf even
+// if x is a NaN (C9X)
+ fclass.m.unc p8, p9 = f9, 0x023
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+// For x=inf, multiply y by 1 to raise invalid on y an SNaN
+// (p7) fma.s0 f9=f9,f1,f0
+ // copy f9 to f15; set Denormal, if necessary
+ fma.s.s0 f15=f9,f1,f0
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ // is y Zero ?
+ (p6) fclass.m p6,p0=f9,0x7
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ // is x Zero ?
+ (p9) fclass.m p9,p0=f8,0x7
+ nop.i 0;;
+}
+
+{.mfi
+ // f7=0.5
+ setf.exp f7=r2
+ // a=x2+y2
+ fma.s1 f12=f10,f1,f11
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // x not NaN ?
+ (p6) fclass.m p7,p0=f8,0x3f
+ nop.i 0
+}
+{.mfi
+ // 2*emax-2
+ mov r2=0x100fb
+ // f6=2
+ fma.s1 f6=f1,f1,f1
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // y not NaN ?
+ (p9) fclass.m p8,p0=f9,0x3f
+ nop.i 0;;
+}
+
+{.mfb
+ nop.m 0
+ // if f8=Infinity or f9=Zero, return |f8|
+ (p7) fmerge.s f8=f0,f14
+ (p7) br.ret.spnt b0
+}
+{.mfb
+ nop.m 0
+ // if f9=Infinity or f8=Zero, return |f9|
+ (p8) fmerge.s f8=f0,f15
+ (p8) br.ret.spnt b0;;
+}
+
+{ .mfi
+ nop.m 0
+// Identify Natvals, Infs, NaNs, and Zeros
+// and return result
+ fclass.m.unc p7, p0 = f12, 0x1E7
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // z0=frsqrta(a)
+ frsqrta.s1 f8,p6=f12
+ nop.i 0;;
+}
+
+{.mfb
+ // get exponent of x^2+y^2
+ getf.exp r3=f12
+ // if special case, set f8
+ (p7) mov f8=f12
+ (p7) br.ret.spnt b0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // S0=a*z0
+ (p6) fma.s1 f12=f12,f8,f0
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // H0=0.5*z0
+ (p6) fma.s1 f10=f8,f7,f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // f6=5/2
+ fma.s1 f6=f7,f1,f6
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // f11=3/2
+ fma.s1 f11=f7,f1,f1
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // d=0.5-S0*H0
+ (p6) fnma.s1 f7=f12,f10,f7
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // P01=d+1
+ (p6) fma.s1 f10=f1,f7,f1
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // P23=5/2*d+3/2
+ (p6) fma.s1 f11=f6,f7,f11
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ // d2=d*d
+ (p6) fma.s1 f7=f7,f7,f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ // Is x^2 + y^2 well less than the overflow
+ // threshold?
+ (p6) cmp.lt.unc p7, p8 = r3,r2
+ // P=P01+d2*P23
+ (p6) fma.s1 f10=f7,f11,f10
+ nop.i 0;;
+}
+
+{.mfb
+ nop.m 0
+ // S=P*S0
+ fma.s.s0 f8=f10,f12,f0
+ // No overflow in this case
+ (p7) br.ret.sptk b0;;
+}
+
+{ .mfi
+ nop.m 0
+(p8) fsetc.s2 0x7F,0x42
+ // Possible overflow path, must detect by
+ // Setting widest range exponent with prevailing
+ // rounding mode.
+ nop.i 0 ;;
+}
+
+
+{ .mfi
+ // bias+0x400 (bias+EMAX+1)
+ (p8) mov r2=0x1007f
+ // S=P*S0
+ (p8) fma.s.s2 f12=f10,f12,f0
+ nop.i 0 ;;
+}
+{ .mfi
+(p8) setf.exp f11 = r2
+(p8) fsetc.s2 0x7F,0x40
+// Restore Original Mode in S2
+ nop.i 0 ;;
+}
+{ .mfi
+ nop.m 0
+(p8) fcmp.lt.unc.s1 p9, p10 = f12, f11
+ nop.i 0 ;;
+}
+{ .mib
+ nop.m 0
+ mov GR_Parameter_TAG = 47
+ // No overflow
+(p9) br.ret.sptk b0;;
+}
+.endp hypotf
+ASM_SIZE_DIRECTIVE(hypotf)
+
+.proc __libm_error_region
+__libm_error_region:
+.prologue
+{ .mii
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+(p0) mov GR_Parameter_TAG = 47
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfs [GR_Parameter_Y] = FR_Y,16 // Store Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mib
+ stfs [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
+}
+{ .mib
+ stfs [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+{ .mmi
+ ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_hypotl.S b/sysdeps/ia64/fpu/e_hypotl.S
new file mode 100644
index 0000000..54ca849
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_hypotl.S
@@ -0,0 +1,478 @@
+.file "hypotl.asm"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
+// Bob Norin, Shane Story, and Ping Tak Peter Tang of the
+// Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// *********************************************************************
+//
+// History:
+// 2/02/00 hand-optimized
+// 4/04/00 Unwind support added
+// 6/20/00 new version
+// 8/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+//
+// *********************************************************************
+// ___________
+// Function: hypotl(x,y) = |(x^2 + y^2) = for double extended values
+// x and y
+// Also provides cabsl functionality.
+//
+// *********************************************************************
+//
+// Resources Used:
+//
+// Floating-Point Registers: f8 (Input and Return Value)
+// f9 (Input)
+// f6 -f15, f32-f34
+//
+// General Purpose Registers:
+// r2-r3 (Scratch)
+// r32-r36 (Locals)
+// r37-r40 (Used to pass arguments to error handling routine)
+//
+// Predicate Registers: p6 - p10
+//
+// *********************************************************************
+//
+// IEEE Special Conditions:
+//
+// All faults and exceptions should be raised correctly.
+// Overflow can occur.
+// hypotl(Infinity and anything) = +Infinity
+// hypotl(QNaN and anything) = QNaN
+// hypotl(SNaN and anything ) = QNaN
+//
+// *********************************************************************
+//
+// Implementation:
+// x2 = x * x in double-extended
+// y2 = y * y in double-extended
+// temp = x2 + y2 in double-extended
+// sqrt(temp) rounded to double extended
+//
+// *********************************************************************
+
+#include "libm_support.h"
+
+GR_SAVE_PFS = r33
+GR_SAVE_B0 = r34
+GR_SAVE_GP = r35
+GR_Parameter_X = r36
+GR_Parameter_Y = r37
+GR_Parameter_RESULT = r38
+GR_Parameter_TAG = r39
+
+FR_X = f32
+FR_Y = f33
+FR_RESULT = f8
+
+.section .text
+#ifndef _LIBC
+.proc cabsl#
+.global cabsl#
+cabsl:
+.endp cabsl
+#endif
+.proc hypotl#
+.global hypotl#
+.align 64
+
+hypotl:
+#ifdef _LIBC
+.global __hypotl
+__hypotl:
+.global __ieee754_hypotl
+__ieee754_hypotl:
+#endif
+{.mfi
+ alloc r32= ar.pfs,0,4,4,0
+ // Compute x*x
+ fma.s1 f10=f8,f8,f0
+ // r2=bias-1
+ mov r2=0xfffe
+}
+{.mfi
+ nop.m 0
+ // y*y
+ fma.s1 f11=f9,f9,f0
+ nop.i 0;;
+}
+
+{ .mfi
+ nop.m 0
+// Check if x is an Inf - if so return Inf even
+// if y is a NaN (C9X)
+ fclass.m.unc p7, p6 = f8, 0x023
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // if possible overflow, copy f8 to f32
+ // set Denormal, if necessary
+ // (p8)
+ fma.s0 f32=f8,f1,f0
+ nop.i 0;;
+}
+{ .mfi
+ nop.m 0
+// Check if y is an Inf - if so return Inf even
+// if x is a NaN (C9X)
+ fclass.m.unc p8, p9 = f9, 0x023
+ nop.i 0
+}
+{ .mfi
+ nop.m 999
+// For x=inf, multiply y by 1 to raise invalid on y an SNaN
+// (p7) fma.s0 f9=f9,f1,f0
+ // copy f9 to f33; set Denormal, if necessary
+ fma.s0 f33=f9,f1,f0
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ // is y Zero ?
+ (p6) fclass.m p6,p0=f9,0x7
+ nop.i 0;;
+}
+
+{.mfi
+ // f7=0.5
+ setf.exp f7=r2
+ // a=x2+y2
+ fma.s1 f12=f10,f1,f11
+ nop.i 0
+}
+{.mfi
+ mov r2=0x408c //0000
+ // dx=x*x-x2
+ fms.s1 f13=f8,f8,f10
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ // is x Zero ?
+ (p9) fclass.m p9,p0=f8,0x7
+ shl r2=r2,16
+}
+{.mfi
+ nop.m 0
+ // dy=y*y-y2
+ fms.s1 f14=f9,f9,f11
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // x not NaN ?
+ (p6) fclass.m p7,p0=f8,0x3f
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // f6=2
+ fma.s1 f6=f1,f1,f1
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // f34=min(x2,y2)
+ famin.s1 f34=f10,f11
+ nop.i 0
+}
+{.mfb
+ nop.m 0
+ // f10=max(x2,y2)
+ famax.s1 f10=f11,f10
+ nop.b 0;; //
+}
+
+{.mfi
+ nop.m 0
+ // y not NaN ?
+ (p9) fclass.m p8,p0=f9,0x3f
+ nop.i 0;;
+}
+{.mfb
+ // f9=35/8
+ setf.s f9=r2
+ // if f8=Infinity or f9=Zero, return |f8|
+ (p7) fmerge.s f8=f0,f32
+ (p7) br.ret.spnt b0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // z0=frsqrta(a)
+ frsqrta.s1 f8,p6=f12
+ nop.i 0;;
+}
+{ .mfi
+ nop.m 0
+// Identify Natvals, Infs, NaNs, and Zeros
+// and return result
+ fclass.m.unc p7, p0 = f12, 0x1E7
+ nop.i 0
+}
+{.mfi
+ // get exponent of x^2+y^2
+ getf.exp r3=f12
+ // dxy=dx+dy
+ fma.s1 f13=f13,f1,f14
+ nop.i 0;;
+}
+
+{.mfb
+ // 2*emax-2
+ mov r2=0x17ffb
+ // if f9=Infinity or f8=Zero, return |f9|
+ (p8) fmerge.s f8=f0,f33
+ (p8) br.ret.spnt b0
+}
+{.mfi
+ nop.m 0
+ // dd=a-max(x2,y2)
+ fnma.s1 f10=f10,f1,f12
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // S0=a*z0
+ (p6) fma.s1 f14=f12,f8,f0
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // H0=0.5*z0
+ (p6) fma.s1 f15=f8,f7,f0
+ nop.i 0;;
+}
+
+{.mfb
+ nop.m 0
+ // if special case, set f8
+ (p7) mov f8=f12
+ (p7) br.ret.spnt b0
+}
+{.mfi
+ nop.m 0
+ // da=min(x2,y2)-dd
+ fnma.s1 f10=f10,f1,f34
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ // f6=5/2
+ fma.s1 f6=f7,f1,f6
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // f11=3/2
+ fma.s1 f11=f7,f1,f1
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // d=0.5-S0*H0
+ (p6) fnma.s1 f7=f14,f15,f7
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // P1=3/2*d+1
+ (p6) fma.s1 f11=f11,f7,f1
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // P2=35/8*d+5/2
+ (p6) fma.s1 f9=f9,f7,f6
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ // d2=d*d
+ (p6) fma.s1 f34=f7,f7,f0
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // T0=d*S0
+ (p6) fma.s1 f6=f7,f14,f0
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // G0=d*H0
+ (p6) fma.s1 f7=f7,f15,f0
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ // P=d2*P2+P1
+ (p6) fma.s1 f11=f34,f9,f11
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // S1=p*T0+S0
+ (p6) fma.s1 f14=f11,f6,f14
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // H1=p*G0+H0
+ (p6) fma.s1 f15=f11,f7,f15
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // e1=a-S1*S1
+ (p6) fnma.s1 f7=f14,f14,f12
+ nop.i 0
+}
+{.mfi
+ // Is x^2 + y^2 well less than the overflow
+ // threshold?
+ (p6) cmp.lt.unc p7, p8 = r3,r2
+ // c=dxy+da
+ (p6) fma.s1 f13=f13,f1,f10
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // e=e1+c
+ (p6) fma.s1 f13=f7,f1,f13
+ nop.i 0;;
+}
+
+{.mfb
+ nop.m 0
+ // S=e*H1+S1
+ fma.s0 f8=f13,f15,f14
+ // No overflow in this case
+ (p7) br.ret.sptk b0;;
+}
+
+{ .mfi
+ nop.m 0
+(p8) fsetc.s2 0x7F,0x42
+ // Possible overflow path, must detect by
+ // Setting widest range exponent with prevailing
+ // rounding mode.
+ nop.i 0 ;;
+}
+
+
+{ .mfi
+ // bias+0x4000 (bias+EMAX+1)
+ (p8) mov r2=0x13fff
+ // S=e*H1+S1
+ (p8) fma.s2 f12=f13,f15,f14
+ nop.i 0 ;;
+}
+{ .mfi
+(p8) setf.exp f11 = r2
+(p8) fsetc.s2 0x7F,0x40
+// Restore Original Mode in S2
+ nop.i 0 ;;
+}
+{ .mfi
+ nop.m 0
+(p8) fcmp.lt.unc.s1 p9, p10 = f12, f11
+ nop.i 0 ;;
+}
+{ .mib
+ nop.m 0
+ mov GR_Parameter_TAG = 45;
+ // No overflow
+(p9) br.ret.sptk b0;;
+}
+.endp hypotl
+ASM_SIZE_DIRECTIVE(hypotl)
+
+.proc __libm_error_region
+__libm_error_region:
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfe [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mib
+ stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
+}
+{ .mib
+ stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+{ .mmi
+ ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_log.S b/sysdeps/ia64/fpu/e_log.S
new file mode 100644
index 0000000..09e305d
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_log.S
@@ -0,0 +1,1091 @@
+.file "log.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00 Initial version
+// 4/04/00 Unwind support added
+// 6/16/00 Updated table to be rounded correctly
+// 8/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+// 8/17/00 Improved speed of main path by 5 cycles
+// Shortened path for x=1.0
+// 1/09/01 Improved speed, fixed flags for neg denormals
+//
+//
+// API
+//==============================================================
+// double log(double)
+// double log10(double)
+//
+// Overview of operation
+//==============================================================
+// Background
+//
+// Consider x = 2^N 1.f1 f2 f3 f4...f63
+// Log(x) = log(frcpa(x) x/frcpa(x))
+// = log(1/frcpa(x)) + log(frcpa(x) x)
+// = -log(frcpa(x)) + log(frcpa(x) x)
+//
+// frcpa(x) = 2^-N frcpa((1.f1 f2 ... f63)
+//
+// -log(frcpa(x)) = -log(C)
+// = -log(2^-N) - log(frcpa(1.f1 f2 ... f63))
+//
+// -log(frcpa(x)) = -log(C)
+// = +Nlog2 - log(frcpa(1.f1 f2 ... f63))
+//
+// -log(frcpa(x)) = -log(C)
+// = +Nlog2 + log(frcpa(1.f1 f2 ... f63))
+//
+// Log(x) = log(1/frcpa(x)) + log(frcpa(x) x)
+
+// Log(x) = +Nlog2 + log(1./frcpa(1.f1 f2 ... f63)) + log(frcpa(x) x)
+// Log(x) = +Nlog2 - log(/frcpa(1.f1 f2 ... f63)) + log(frcpa(x) x)
+// Log(x) = +Nlog2 + T + log(frcpa(x) x)
+//
+// Log(x) = +Nlog2 + T + log(C x)
+//
+// Cx = 1 + r
+//
+// Log(x) = +Nlog2 + T + log(1+r)
+// Log(x) = +Nlog2 + T + Series( r - r^2/2 + r^3/3 - r^4/4 ....)
+//
+// 1.f1 f2 ... f8 has 256 entries.
+// They are 1 + k/2^8, k = 0 ... 255
+// These 256 values are the table entries.
+//
+// Implementation
+//===============
+// CASE 1: |x-1| >= 2^-6
+// C = frcpa(x)
+// r = C * x - 1
+//
+// Form rseries = r + P1*r^2 + P2*r^3 + P3*r^4 + P4*r^5 + P5*r^6
+//
+// x = f * 2*n where f is 1.f_1f_2f_3....f_63
+// Nfloat = float(n) where n is the true unbiased exponent
+// pre-index = f_1f_2....f_8
+// index = pre_index * 16
+// get the dxt table entry at index + offset = T
+//
+// result = (T + Nfloat * log(2)) + rseries
+//
+// The T table is calculated as follows
+// Form x_k = 1 + k/2^8 where k goes from 0... 255
+// y_k = frcpa(x_k)
+// log(1/y_k) in quad and round to double-extended
+
+// CASE 2: |x-1| < 2^-6
+// w = x - 1
+//
+// Form wseries = w + Q1*w^2 + Q2*w^3 + ... + Q7*w^8 + Q8*w^9
+//
+// result = wseries
+
+// Special values
+//==============================================================
+
+
+// log(+0) = -inf
+// log(-0) = -inf
+
+// log(+qnan) = +qnan
+// log(-qnan) = -qnan
+// log(+snan) = +qnan
+// log(-snan) = -qnan
+
+// log(-n) = QNAN Indefinite
+// log(-inf) = QNAN Indefinite
+
+// log(+inf) = +inf
+
+// Registers used
+//==============================================================
+// Floating Point registers used:
+// f8, input
+// f9 -> f15, f32 -> f68
+
+// General registers used:
+// r32 -> r51
+
+// Predicate registers used:
+// p6 -> p15
+
+// p8 log base e
+// p6 log base e special
+// p9 used in the frcpa
+// p13 log base e large W
+// p14 log base e small w
+
+// p7 log base 10
+// p10 log base 10 large W
+// p11 log base 10 small w
+// p12 log base 10 special
+
+#include "libm_support.h"
+
+// Assembly macros
+//==============================================================
+
+log_int_Nfloat = f9
+log_Nfloat = f10
+
+log_P5 = f11
+log_P4 = f12
+log_P3 = f13
+log_P2 = f14
+log_half = f15
+
+log_log2 = f32
+log_T = f33
+
+log_rp_p4 = f34
+log_rp_p32 = f35
+log_rp_p2 = f36
+log_w6 = f37
+log_rp_p10 = f38
+log_rcube = f39
+log_rsq = f40
+
+log_T_plus_Nlog2 = f41
+log_w3 = f42
+
+log_r = f43
+log_C = f44
+
+log_w = f45
+log_Q8 = f46
+log_Q7 = f47
+log_Q4 = f48
+log_Q3 = f49
+log_Q6 = f50
+log_Q5 = f51
+log_Q2 = f52
+log_Q1 = f53
+log_P1 = f53
+
+log_rp_q7 = f54
+log_rp_q65 = f55
+log_Qlo = f56
+
+log_rp_q3 = f57
+log_rp_q21 = f58
+log_Qhi = f59
+
+log_wsq = f60
+log_w4 = f61
+log_Q = f62
+
+log_inv_ln10 = f63
+log_log10_hi = f64
+log_log10_lo = f65
+log_rp_q10 = f66
+log_NORM_f8 = f67
+log_r2P_r = f68
+
+// ===================================
+
+log_GR_exp_17_ones = r33
+log_GR_exp_16_ones = r34
+log_GR_exp_f8 = r35
+log_GR_signexp_f8 = r36
+log_GR_true_exp_f8 = r37
+log_GR_significand_f8 = r38
+log_GR_half_exp = r39
+log_GR_index = r39
+log_AD_1 = r40
+log_GR_signexp_w = r41
+log_GR_fff9 = r42
+log_AD_2 = r43
+log_GR_exp_w = r44
+
+GR_SAVE_B0 = r45
+GR_SAVE_GP = r46
+GR_SAVE_PFS = r47
+
+GR_Parameter_X = r48
+GR_Parameter_Y = r49
+GR_Parameter_RESULT = r50
+log_GR_tag = r51
+
+
+// Data tables
+//==============================================================
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
+.align 16
+
+log_table_1:
+ASM_TYPE_DIRECTIVE(log_table_1,@object)
+data8 0xBFC5555DA7212371 // P5
+data8 0x3FC999A19EEF5826 // P4
+data8 0x3FBC756AC654273B // Q8
+data8 0xBFC001A42489AB4D // Q7
+data8 0x3FC99999999A169B // Q4
+data8 0xBFD00000000019AC // Q3
+ASM_SIZE_DIRECTIVE(log_table_1)
+log_table_2:
+ASM_TYPE_DIRECTIVE(log_table_2,@object)
+data8 0xBFCFFFFFFFFEF009 // P3
+data8 0x3FD555555554ECB2 // P2
+data8 0x3FC2492479AA0DF8 // Q6
+data8 0xBFC5555544986F52 // Q5
+data8 0x3FD5555555555555 // Q2
+data8 0xBFE0000000000000 // Q1, P1 = -0.5
+
+
+data8 0xde5bd8a937287195, 0x00003ffd // double-extended 1/ln(10)
+data8 0xb17217f7d1cf79ac, 0x00003ffe // log2
+// b17217f7d1cf79ab c9e3b39803f2f6a
+
+
+data8 0x80200aaeac44ef38 , 0x00003ff6 // log(1/frcpa(1+ 0/2^-8))
+
+data8 0xc09090a2c35aa070 , 0x00003ff7 // log(1/frcpa(1+ 1/2^-8))
+data8 0xa0c94fcb41977c75 , 0x00003ff8 // log(1/frcpa(1+ 2/2^-8))
+data8 0xe18b9c263af83301 , 0x00003ff8 // log(1/frcpa(1+ 3/2^-8))
+data8 0x8d35c8d6399c30ea , 0x00003ff9 // log(1/frcpa(1+ 4/2^-8))
+data8 0xadd4d2ecd601cbb8 , 0x00003ff9 // log(1/frcpa(1+ 5/2^-8))
+
+data8 0xce95403a192f9f01 , 0x00003ff9 // log(1/frcpa(1+ 6/2^-8))
+data8 0xeb59392cbcc01096 , 0x00003ff9 // log(1/frcpa(1+ 7/2^-8))
+data8 0x862c7d0cefd54c5d , 0x00003ffa // log(1/frcpa(1+ 8/2^-8))
+data8 0x94aa63c65e70d499 , 0x00003ffa // log(1/frcpa(1+ 9/2^-8))
+data8 0xa54a696d4b62b382 , 0x00003ffa // log(1/frcpa(1+ 10/2^-8))
+
+data8 0xb3e4a796a5dac208 , 0x00003ffa // log(1/frcpa(1+ 11/2^-8))
+data8 0xc28c45b1878340a9 , 0x00003ffa // log(1/frcpa(1+ 12/2^-8))
+data8 0xd35c55f39d7a6235 , 0x00003ffa // log(1/frcpa(1+ 13/2^-8))
+data8 0xe220f037b954f1f5 , 0x00003ffa // log(1/frcpa(1+ 14/2^-8))
+data8 0xf0f3389b036834f3 , 0x00003ffa // log(1/frcpa(1+ 15/2^-8))
+
+data8 0xffd3488d5c980465 , 0x00003ffa // log(1/frcpa(1+ 16/2^-8))
+data8 0x87609ce2ed300490 , 0x00003ffb // log(1/frcpa(1+ 17/2^-8))
+data8 0x8ede9321e8c85927 , 0x00003ffb // log(1/frcpa(1+ 18/2^-8))
+data8 0x96639427f2f8e2f4 , 0x00003ffb // log(1/frcpa(1+ 19/2^-8))
+data8 0x9defad3e8f73217b , 0x00003ffb // log(1/frcpa(1+ 20/2^-8))
+
+data8 0xa582ebd50097029c , 0x00003ffb // log(1/frcpa(1+ 21/2^-8))
+data8 0xac06dbe75ab80fee , 0x00003ffb // log(1/frcpa(1+ 22/2^-8))
+data8 0xb3a78449b2d3ccca , 0x00003ffb // log(1/frcpa(1+ 23/2^-8))
+data8 0xbb4f79635ab46bb2 , 0x00003ffb // log(1/frcpa(1+ 24/2^-8))
+data8 0xc2fec93a83523f3f , 0x00003ffb // log(1/frcpa(1+ 25/2^-8))
+
+data8 0xc99af2eaca4c4571 , 0x00003ffb // log(1/frcpa(1+ 26/2^-8))
+data8 0xd1581106472fa653 , 0x00003ffb // log(1/frcpa(1+ 27/2^-8))
+data8 0xd8002560d4355f2e , 0x00003ffb // log(1/frcpa(1+ 28/2^-8))
+data8 0xdfcb43b4fe508632 , 0x00003ffb // log(1/frcpa(1+ 29/2^-8))
+data8 0xe67f6dff709d4119 , 0x00003ffb // log(1/frcpa(1+ 30/2^-8))
+
+data8 0xed393b1c22351280 , 0x00003ffb // log(1/frcpa(1+ 31/2^-8))
+data8 0xf5192bff087bcc35 , 0x00003ffb // log(1/frcpa(1+ 32/2^-8))
+data8 0xfbdf4ff6dfef2fa3 , 0x00003ffb // log(1/frcpa(1+ 33/2^-8))
+data8 0x81559a97f92f9cc7 , 0x00003ffc // log(1/frcpa(1+ 34/2^-8))
+data8 0x84be72bce90266e8 , 0x00003ffc // log(1/frcpa(1+ 35/2^-8))
+
+data8 0x88bc74113f23def2 , 0x00003ffc // log(1/frcpa(1+ 36/2^-8))
+data8 0x8c2ba3edf6799d11 , 0x00003ffc // log(1/frcpa(1+ 37/2^-8))
+data8 0x8f9dc92f92ea08b1 , 0x00003ffc // log(1/frcpa(1+ 38/2^-8))
+data8 0x9312e8f36efab5a7 , 0x00003ffc // log(1/frcpa(1+ 39/2^-8))
+data8 0x968b08643409ceb6 , 0x00003ffc // log(1/frcpa(1+ 40/2^-8))
+
+data8 0x9a062cba08a1708c , 0x00003ffc // log(1/frcpa(1+ 41/2^-8))
+data8 0x9d845b3abf95485c , 0x00003ffc // log(1/frcpa(1+ 42/2^-8))
+data8 0xa06fd841bc001bb4 , 0x00003ffc // log(1/frcpa(1+ 43/2^-8))
+data8 0xa3f3a74652fbe0db , 0x00003ffc // log(1/frcpa(1+ 44/2^-8))
+data8 0xa77a8fb2336f20f5 , 0x00003ffc // log(1/frcpa(1+ 45/2^-8))
+
+data8 0xab0497015d28b0a0 , 0x00003ffc // log(1/frcpa(1+ 46/2^-8))
+data8 0xae91c2be6ba6a615 , 0x00003ffc // log(1/frcpa(1+ 47/2^-8))
+data8 0xb189d1b99aebb20b , 0x00003ffc // log(1/frcpa(1+ 48/2^-8))
+data8 0xb51cced5de9c1b2c , 0x00003ffc // log(1/frcpa(1+ 49/2^-8))
+data8 0xb819bee9e720d42f , 0x00003ffc // log(1/frcpa(1+ 50/2^-8))
+
+data8 0xbbb2a0947b093a5d , 0x00003ffc // log(1/frcpa(1+ 51/2^-8))
+data8 0xbf4ec1505811684a , 0x00003ffc // log(1/frcpa(1+ 52/2^-8))
+data8 0xc2535bacfa8975ff , 0x00003ffc // log(1/frcpa(1+ 53/2^-8))
+data8 0xc55a3eafad187eb8 , 0x00003ffc // log(1/frcpa(1+ 54/2^-8))
+data8 0xc8ff2484b2c0da74 , 0x00003ffc // log(1/frcpa(1+ 55/2^-8))
+
+data8 0xcc0b1a008d53ab76 , 0x00003ffc // log(1/frcpa(1+ 56/2^-8))
+data8 0xcfb6203844b3209b , 0x00003ffc // log(1/frcpa(1+ 57/2^-8))
+data8 0xd2c73949a47a19f5 , 0x00003ffc // log(1/frcpa(1+ 58/2^-8))
+data8 0xd5daae18b49d6695 , 0x00003ffc // log(1/frcpa(1+ 59/2^-8))
+data8 0xd8f08248cf7e8019 , 0x00003ffc // log(1/frcpa(1+ 60/2^-8))
+
+data8 0xdca7749f1b3e540e , 0x00003ffc // log(1/frcpa(1+ 61/2^-8))
+data8 0xdfc28e033aaaf7c7 , 0x00003ffc // log(1/frcpa(1+ 62/2^-8))
+data8 0xe2e012a5f91d2f55 , 0x00003ffc // log(1/frcpa(1+ 63/2^-8))
+data8 0xe600064ed9e292a8 , 0x00003ffc // log(1/frcpa(1+ 64/2^-8))
+data8 0xe9226cce42b39f60 , 0x00003ffc // log(1/frcpa(1+ 65/2^-8))
+
+data8 0xec4749fd97a28360 , 0x00003ffc // log(1/frcpa(1+ 66/2^-8))
+data8 0xef6ea1bf57780495 , 0x00003ffc // log(1/frcpa(1+ 67/2^-8))
+data8 0xf29877ff38809091 , 0x00003ffc // log(1/frcpa(1+ 68/2^-8))
+data8 0xf5c4d0b245cb89be , 0x00003ffc // log(1/frcpa(1+ 69/2^-8))
+data8 0xf8f3afd6fcdef3aa , 0x00003ffc // log(1/frcpa(1+ 70/2^-8))
+
+data8 0xfc2519756be1abc7 , 0x00003ffc // log(1/frcpa(1+ 71/2^-8))
+data8 0xff59119f503e6832 , 0x00003ffc // log(1/frcpa(1+ 72/2^-8))
+data8 0x8147ce381ae0e146 , 0x00003ffd // log(1/frcpa(1+ 73/2^-8))
+data8 0x82e45f06cb1ad0f2 , 0x00003ffd // log(1/frcpa(1+ 74/2^-8))
+data8 0x842f5c7c573cbaa2 , 0x00003ffd // log(1/frcpa(1+ 75/2^-8))
+
+data8 0x85ce471968c8893a , 0x00003ffd // log(1/frcpa(1+ 76/2^-8))
+data8 0x876e8305bc04066d , 0x00003ffd // log(1/frcpa(1+ 77/2^-8))
+data8 0x891012678031fbb3 , 0x00003ffd // log(1/frcpa(1+ 78/2^-8))
+data8 0x8a5f1493d766a05f , 0x00003ffd // log(1/frcpa(1+ 79/2^-8))
+data8 0x8c030c778c56fa00 , 0x00003ffd // log(1/frcpa(1+ 80/2^-8))
+
+data8 0x8da85df17e31d9ae , 0x00003ffd // log(1/frcpa(1+ 81/2^-8))
+data8 0x8efa663e7921687e , 0x00003ffd // log(1/frcpa(1+ 82/2^-8))
+data8 0x90a22b6875c6a1f8 , 0x00003ffd // log(1/frcpa(1+ 83/2^-8))
+data8 0x91f62cc8f5d24837 , 0x00003ffd // log(1/frcpa(1+ 84/2^-8))
+data8 0x93a06cfc3857d980 , 0x00003ffd // log(1/frcpa(1+ 85/2^-8))
+
+data8 0x94f66d5e6fd01ced , 0x00003ffd // log(1/frcpa(1+ 86/2^-8))
+data8 0x96a330156e6772f2 , 0x00003ffd // log(1/frcpa(1+ 87/2^-8))
+data8 0x97fb3582754ea25b , 0x00003ffd // log(1/frcpa(1+ 88/2^-8))
+data8 0x99aa8259aad1bbf2 , 0x00003ffd // log(1/frcpa(1+ 89/2^-8))
+data8 0x9b0492f6227ae4a8 , 0x00003ffd // log(1/frcpa(1+ 90/2^-8))
+
+data8 0x9c5f8e199bf3a7a5 , 0x00003ffd // log(1/frcpa(1+ 91/2^-8))
+data8 0x9e1293b9998c1daa , 0x00003ffd // log(1/frcpa(1+ 92/2^-8))
+data8 0x9f6fa31e0b41f308 , 0x00003ffd // log(1/frcpa(1+ 93/2^-8))
+data8 0xa0cda11eaf46390e , 0x00003ffd // log(1/frcpa(1+ 94/2^-8))
+data8 0xa22c8f029cfa45aa , 0x00003ffd // log(1/frcpa(1+ 95/2^-8))
+
+data8 0xa3e48badb7856b34 , 0x00003ffd // log(1/frcpa(1+ 96/2^-8))
+data8 0xa5459a0aa95849f9 , 0x00003ffd // log(1/frcpa(1+ 97/2^-8))
+data8 0xa6a79c84480cfebd , 0x00003ffd // log(1/frcpa(1+ 98/2^-8))
+data8 0xa80a946d0fcb3eb2 , 0x00003ffd // log(1/frcpa(1+ 99/2^-8))
+data8 0xa96e831a3ea7b314 , 0x00003ffd // log(1/frcpa(1+100/2^-8))
+
+data8 0xaad369e3dc544e3b , 0x00003ffd // log(1/frcpa(1+101/2^-8))
+data8 0xac92e9588952c815 , 0x00003ffd // log(1/frcpa(1+102/2^-8))
+data8 0xadfa035aa1ed8fdc , 0x00003ffd // log(1/frcpa(1+103/2^-8))
+data8 0xaf6219eae1ad6e34 , 0x00003ffd // log(1/frcpa(1+104/2^-8))
+data8 0xb0cb2e6d8160f753 , 0x00003ffd // log(1/frcpa(1+105/2^-8))
+
+data8 0xb2354249ad950f72 , 0x00003ffd // log(1/frcpa(1+106/2^-8))
+data8 0xb3a056e98ef4a3b4 , 0x00003ffd // log(1/frcpa(1+107/2^-8))
+data8 0xb50c6dba52c6292a , 0x00003ffd // log(1/frcpa(1+108/2^-8))
+data8 0xb679882c33876165 , 0x00003ffd // log(1/frcpa(1+109/2^-8))
+data8 0xb78c07429785cedc , 0x00003ffd // log(1/frcpa(1+110/2^-8))
+
+data8 0xb8faeb8dc4a77d24 , 0x00003ffd // log(1/frcpa(1+111/2^-8))
+data8 0xba6ad77eb36ae0d6 , 0x00003ffd // log(1/frcpa(1+112/2^-8))
+data8 0xbbdbcc915e9bee50 , 0x00003ffd // log(1/frcpa(1+113/2^-8))
+data8 0xbd4dcc44f8cf12ef , 0x00003ffd // log(1/frcpa(1+114/2^-8))
+data8 0xbec0d81bf5b531fa , 0x00003ffd // log(1/frcpa(1+115/2^-8))
+
+data8 0xc034f19c139186f4 , 0x00003ffd // log(1/frcpa(1+116/2^-8))
+data8 0xc14cb69f7c5e55ab , 0x00003ffd // log(1/frcpa(1+117/2^-8))
+data8 0xc2c2abbb6e5fd56f , 0x00003ffd // log(1/frcpa(1+118/2^-8))
+data8 0xc439b2c193e6771e , 0x00003ffd // log(1/frcpa(1+119/2^-8))
+data8 0xc553acb9d5c67733 , 0x00003ffd // log(1/frcpa(1+120/2^-8))
+
+data8 0xc6cc96e441272441 , 0x00003ffd // log(1/frcpa(1+121/2^-8))
+data8 0xc8469753eca88c30 , 0x00003ffd // log(1/frcpa(1+122/2^-8))
+data8 0xc962cf3ce072b05c , 0x00003ffd // log(1/frcpa(1+123/2^-8))
+data8 0xcadeba8771f694aa , 0x00003ffd // log(1/frcpa(1+124/2^-8))
+data8 0xcc5bc08d1f72da94 , 0x00003ffd // log(1/frcpa(1+125/2^-8))
+
+data8 0xcd7a3f99ea035c29 , 0x00003ffd // log(1/frcpa(1+126/2^-8))
+data8 0xcef93860c8a53c35 , 0x00003ffd // log(1/frcpa(1+127/2^-8))
+data8 0xd0192f68a7ed23df , 0x00003ffd // log(1/frcpa(1+128/2^-8))
+data8 0xd19a201127d3c645 , 0x00003ffd // log(1/frcpa(1+129/2^-8))
+data8 0xd2bb92f4061c172c , 0x00003ffd // log(1/frcpa(1+130/2^-8))
+
+data8 0xd43e80b2ee8cc8fc , 0x00003ffd // log(1/frcpa(1+131/2^-8))
+data8 0xd56173601fc4ade4 , 0x00003ffd // log(1/frcpa(1+132/2^-8))
+data8 0xd6e6637efb54086f , 0x00003ffd // log(1/frcpa(1+133/2^-8))
+data8 0xd80ad9f58f3c8193 , 0x00003ffd // log(1/frcpa(1+134/2^-8))
+data8 0xd991d1d31aca41f8 , 0x00003ffd // log(1/frcpa(1+135/2^-8))
+
+data8 0xdab7d02231484a93 , 0x00003ffd // log(1/frcpa(1+136/2^-8))
+data8 0xdc40d532cde49a54 , 0x00003ffd // log(1/frcpa(1+137/2^-8))
+data8 0xdd685f79ed8b265e , 0x00003ffd // log(1/frcpa(1+138/2^-8))
+data8 0xde9094bbc0e17b1d , 0x00003ffd // log(1/frcpa(1+139/2^-8))
+data8 0xe01c91b78440c425 , 0x00003ffd // log(1/frcpa(1+140/2^-8))
+
+data8 0xe14658f26997e729 , 0x00003ffd // log(1/frcpa(1+141/2^-8))
+data8 0xe270cdc2391e0d23 , 0x00003ffd // log(1/frcpa(1+142/2^-8))
+data8 0xe3ffce3a2aa64922 , 0x00003ffd // log(1/frcpa(1+143/2^-8))
+data8 0xe52bdb274ed82887 , 0x00003ffd // log(1/frcpa(1+144/2^-8))
+data8 0xe6589852e75d7df6 , 0x00003ffd // log(1/frcpa(1+145/2^-8))
+
+data8 0xe786068c79937a7d , 0x00003ffd // log(1/frcpa(1+146/2^-8))
+data8 0xe91903adad100911 , 0x00003ffd // log(1/frcpa(1+147/2^-8))
+data8 0xea481236f7d35bb0 , 0x00003ffd // log(1/frcpa(1+148/2^-8))
+data8 0xeb77d48c692e6b14 , 0x00003ffd // log(1/frcpa(1+149/2^-8))
+data8 0xeca84b83d7297b87 , 0x00003ffd // log(1/frcpa(1+150/2^-8))
+
+data8 0xedd977f4962aa158 , 0x00003ffd // log(1/frcpa(1+151/2^-8))
+data8 0xef7179a22f257754 , 0x00003ffd // log(1/frcpa(1+152/2^-8))
+data8 0xf0a450d139366ca7 , 0x00003ffd // log(1/frcpa(1+153/2^-8))
+data8 0xf1d7e0524ff9ffdb , 0x00003ffd // log(1/frcpa(1+154/2^-8))
+data8 0xf30c29036a8b6cae , 0x00003ffd // log(1/frcpa(1+155/2^-8))
+
+data8 0xf4412bc411ea8d92 , 0x00003ffd // log(1/frcpa(1+156/2^-8))
+data8 0xf576e97564c8619d , 0x00003ffd // log(1/frcpa(1+157/2^-8))
+data8 0xf6ad62fa1b5f172f , 0x00003ffd // log(1/frcpa(1+158/2^-8))
+data8 0xf7e499368b55c542 , 0x00003ffd // log(1/frcpa(1+159/2^-8))
+data8 0xf91c8d10abaffe22 , 0x00003ffd // log(1/frcpa(1+160/2^-8))
+
+data8 0xfa553f7018c966f3 , 0x00003ffd // log(1/frcpa(1+161/2^-8))
+data8 0xfb8eb13e185d802c , 0x00003ffd // log(1/frcpa(1+162/2^-8))
+data8 0xfcc8e3659d9bcbed , 0x00003ffd // log(1/frcpa(1+163/2^-8))
+data8 0xfe03d6d34d487fd2 , 0x00003ffd // log(1/frcpa(1+164/2^-8))
+data8 0xff3f8c7581e9f0ae , 0x00003ffd // log(1/frcpa(1+165/2^-8))
+
+data8 0x803e029e280173ae , 0x00003ffe // log(1/frcpa(1+166/2^-8))
+data8 0x80dca10cc52d0757 , 0x00003ffe // log(1/frcpa(1+167/2^-8))
+data8 0x817ba200632755a1 , 0x00003ffe // log(1/frcpa(1+168/2^-8))
+data8 0x821b05f3b01d6774 , 0x00003ffe // log(1/frcpa(1+169/2^-8))
+data8 0x82bacd623ff19d06 , 0x00003ffe // log(1/frcpa(1+170/2^-8))
+
+data8 0x835af8c88e7a8f47 , 0x00003ffe // log(1/frcpa(1+171/2^-8))
+data8 0x83c5f8299e2b4091 , 0x00003ffe // log(1/frcpa(1+172/2^-8))
+data8 0x8466cb43f3d87300 , 0x00003ffe // log(1/frcpa(1+173/2^-8))
+data8 0x850803a67c80ca4b , 0x00003ffe // log(1/frcpa(1+174/2^-8))
+data8 0x85a9a1d11a23b461 , 0x00003ffe // log(1/frcpa(1+175/2^-8))
+
+data8 0x864ba644a18e6e05 , 0x00003ffe // log(1/frcpa(1+176/2^-8))
+data8 0x86ee1182dcc432f7 , 0x00003ffe // log(1/frcpa(1+177/2^-8))
+data8 0x875a925d7e48c316 , 0x00003ffe // log(1/frcpa(1+178/2^-8))
+data8 0x87fdaa109d23aef7 , 0x00003ffe // log(1/frcpa(1+179/2^-8))
+data8 0x88a129ed4becfaf2 , 0x00003ffe // log(1/frcpa(1+180/2^-8))
+
+data8 0x89451278ecd7f9cf , 0x00003ffe // log(1/frcpa(1+181/2^-8))
+data8 0x89b29295f8432617 , 0x00003ffe // log(1/frcpa(1+182/2^-8))
+data8 0x8a572ac5a5496882 , 0x00003ffe // log(1/frcpa(1+183/2^-8))
+data8 0x8afc2d0ce3b2dadf , 0x00003ffe // log(1/frcpa(1+184/2^-8))
+data8 0x8b6a69c608cfd3af , 0x00003ffe // log(1/frcpa(1+185/2^-8))
+
+data8 0x8c101e106e899a83 , 0x00003ffe // log(1/frcpa(1+186/2^-8))
+data8 0x8cb63de258f9d626 , 0x00003ffe // log(1/frcpa(1+187/2^-8))
+data8 0x8d2539c5bd19e2b1 , 0x00003ffe // log(1/frcpa(1+188/2^-8))
+data8 0x8dcc0e064b29e6f1 , 0x00003ffe // log(1/frcpa(1+189/2^-8))
+data8 0x8e734f45d88357ae , 0x00003ffe // log(1/frcpa(1+190/2^-8))
+
+data8 0x8ee30cef034a20db , 0x00003ffe // log(1/frcpa(1+191/2^-8))
+data8 0x8f8b0515686d1d06 , 0x00003ffe // log(1/frcpa(1+192/2^-8))
+data8 0x90336bba039bf32f , 0x00003ffe // log(1/frcpa(1+193/2^-8))
+data8 0x90a3edd23d1c9d58 , 0x00003ffe // log(1/frcpa(1+194/2^-8))
+data8 0x914d0de2f5d61b32 , 0x00003ffe // log(1/frcpa(1+195/2^-8))
+
+data8 0x91be0c20d28173b5 , 0x00003ffe // log(1/frcpa(1+196/2^-8))
+data8 0x9267e737c06cd34a , 0x00003ffe // log(1/frcpa(1+197/2^-8))
+data8 0x92d962ae6abb1237 , 0x00003ffe // log(1/frcpa(1+198/2^-8))
+data8 0x9383fa6afbe2074c , 0x00003ffe // log(1/frcpa(1+199/2^-8))
+data8 0x942f0421651c1c4e , 0x00003ffe // log(1/frcpa(1+200/2^-8))
+
+data8 0x94a14a3845bb985e , 0x00003ffe // log(1/frcpa(1+201/2^-8))
+data8 0x954d133857f861e7 , 0x00003ffe // log(1/frcpa(1+202/2^-8))
+data8 0x95bfd96468e604c4 , 0x00003ffe // log(1/frcpa(1+203/2^-8))
+data8 0x9632d31cafafa858 , 0x00003ffe // log(1/frcpa(1+204/2^-8))
+data8 0x96dfaabd86fa1647 , 0x00003ffe // log(1/frcpa(1+205/2^-8))
+
+data8 0x9753261fcbb2a594 , 0x00003ffe // log(1/frcpa(1+206/2^-8))
+data8 0x9800c11b426b996d , 0x00003ffe // log(1/frcpa(1+207/2^-8))
+data8 0x9874bf4d45ae663c , 0x00003ffe // log(1/frcpa(1+208/2^-8))
+data8 0x99231f5ee9a74f79 , 0x00003ffe // log(1/frcpa(1+209/2^-8))
+data8 0x9997a18a56bcad28 , 0x00003ffe // log(1/frcpa(1+210/2^-8))
+
+data8 0x9a46c873a3267e79 , 0x00003ffe // log(1/frcpa(1+211/2^-8))
+data8 0x9abbcfc621eb6cb6 , 0x00003ffe // log(1/frcpa(1+212/2^-8))
+data8 0x9b310cb0d354c990 , 0x00003ffe // log(1/frcpa(1+213/2^-8))
+data8 0x9be14cf9e1b3515c , 0x00003ffe // log(1/frcpa(1+214/2^-8))
+data8 0x9c5710b8cbb73a43 , 0x00003ffe // log(1/frcpa(1+215/2^-8))
+
+data8 0x9ccd0abd301f399c , 0x00003ffe // log(1/frcpa(1+216/2^-8))
+data8 0x9d7e67f3bdce8888 , 0x00003ffe // log(1/frcpa(1+217/2^-8))
+data8 0x9df4ea81a99daa01 , 0x00003ffe // log(1/frcpa(1+218/2^-8))
+data8 0x9e6ba405a54514ba , 0x00003ffe // log(1/frcpa(1+219/2^-8))
+data8 0x9f1e21c8c7bb62b3 , 0x00003ffe // log(1/frcpa(1+220/2^-8))
+
+data8 0x9f956593f6b6355c , 0x00003ffe // log(1/frcpa(1+221/2^-8))
+data8 0xa00ce1092e5498c3 , 0x00003ffe // log(1/frcpa(1+222/2^-8))
+data8 0xa0c08309c4b912c1 , 0x00003ffe // log(1/frcpa(1+223/2^-8))
+data8 0xa1388a8c6faa2afa , 0x00003ffe // log(1/frcpa(1+224/2^-8))
+data8 0xa1b0ca7095b5f985 , 0x00003ffe // log(1/frcpa(1+225/2^-8))
+
+data8 0xa22942eb47534a00 , 0x00003ffe // log(1/frcpa(1+226/2^-8))
+data8 0xa2de62326449d0a3 , 0x00003ffe // log(1/frcpa(1+227/2^-8))
+data8 0xa357690f88bfe345 , 0x00003ffe // log(1/frcpa(1+228/2^-8))
+data8 0xa3d0a93f45169a4b , 0x00003ffe // log(1/frcpa(1+229/2^-8))
+data8 0xa44a22f7ffe65f30 , 0x00003ffe // log(1/frcpa(1+230/2^-8))
+
+data8 0xa500c5e5b4c1aa36 , 0x00003ffe // log(1/frcpa(1+231/2^-8))
+data8 0xa57ad064eb2ebbc2 , 0x00003ffe // log(1/frcpa(1+232/2^-8))
+data8 0xa5f5152dedf4384e , 0x00003ffe // log(1/frcpa(1+233/2^-8))
+data8 0xa66f9478856233ec , 0x00003ffe // log(1/frcpa(1+234/2^-8))
+data8 0xa6ea4e7cca02c32e , 0x00003ffe // log(1/frcpa(1+235/2^-8))
+
+data8 0xa765437325341ccf , 0x00003ffe // log(1/frcpa(1+236/2^-8))
+data8 0xa81e21e6c75b4020 , 0x00003ffe // log(1/frcpa(1+237/2^-8))
+data8 0xa899ab333fe2b9ca , 0x00003ffe // log(1/frcpa(1+238/2^-8))
+data8 0xa9157039c51ebe71 , 0x00003ffe // log(1/frcpa(1+239/2^-8))
+data8 0xa991713433c2b999 , 0x00003ffe // log(1/frcpa(1+240/2^-8))
+
+data8 0xaa0dae5cbcc048b3 , 0x00003ffe // log(1/frcpa(1+241/2^-8))
+data8 0xaa8a27ede5eb13ad , 0x00003ffe // log(1/frcpa(1+242/2^-8))
+data8 0xab06de228a9e3499 , 0x00003ffe // log(1/frcpa(1+243/2^-8))
+data8 0xab83d135dc633301 , 0x00003ffe // log(1/frcpa(1+244/2^-8))
+data8 0xac3fb076adc7fe7a , 0x00003ffe // log(1/frcpa(1+245/2^-8))
+
+data8 0xacbd3cbbe47988f1 , 0x00003ffe // log(1/frcpa(1+246/2^-8))
+data8 0xad3b06b1a5dc57c3 , 0x00003ffe // log(1/frcpa(1+247/2^-8))
+data8 0xadb90e94af887717 , 0x00003ffe // log(1/frcpa(1+248/2^-8))
+data8 0xae3754a218f7c816 , 0x00003ffe // log(1/frcpa(1+249/2^-8))
+data8 0xaeb5d9175437afa2 , 0x00003ffe // log(1/frcpa(1+250/2^-8))
+
+data8 0xaf349c322e9c7cee , 0x00003ffe // log(1/frcpa(1+251/2^-8))
+data8 0xafb39e30d1768d1c , 0x00003ffe // log(1/frcpa(1+252/2^-8))
+data8 0xb032df51c2c93116 , 0x00003ffe // log(1/frcpa(1+253/2^-8))
+data8 0xb0b25fd3e6035ad9 , 0x00003ffe // log(1/frcpa(1+254/2^-8))
+data8 0xb1321ff67cba178c , 0x00003ffe // log(1/frcpa(1+255/2^-8))
+ASM_SIZE_DIRECTIVE(log_table_2)
+
+
+.align 32
+.global log#
+.global log10#
+
+// log10 has p7 true, p8 false
+// log has p8 true, p7 false
+
+.section .text
+.proc log10#
+.align 32
+
+log10:
+#ifdef _LIBC
+.global __ieee754_log10
+.type __ieee754_log10,@function
+__ieee754_log10:
+#endif
+{ .mfi
+ alloc r32=ar.pfs,1,15,4,0
+ frcpa.s1 log_C,p9 = f1,f8
+ cmp.eq.unc p7,p8 = r0, r0
+}
+{ .mfb
+ addl log_AD_1 = @ltoff(log_table_1), gp
+ fnorm.s1 log_NORM_f8 = f8
+ br.sptk L(LOG_LOG10_X)
+}
+;;
+
+.endp log10
+ASM_SIZE_DIRECTIVE(log10)
+ASM_SIZE_DIRECTIVE(__ieee754_log10)
+
+
+.section .text
+.proc log#
+.align 32
+log:
+#ifdef _LIBC
+.global __ieee754_log
+.type __ieee754_log,@function
+__ieee754_log:
+#endif
+
+{ .mfi
+ alloc r32=ar.pfs,1,15,4,0
+ frcpa.s1 log_C,p9 = f1,f8
+ cmp.eq.unc p8,p7 = r0, r0
+}
+{ .mfi
+ addl log_AD_1 = @ltoff(log_table_1), gp
+ fnorm.s1 log_NORM_f8 = f8
+ nop.i 999
+}
+;;
+
+L(LOG_LOG10_X):
+
+{ .mfi
+ ld8 log_AD_1 = [log_AD_1]
+ fclass.m.unc p15,p0 = f8, 0x0b // Test for x=unorm
+ mov log_GR_fff9 = 0xfff9
+}
+{ .mfi
+ mov log_GR_half_exp = 0x0fffe
+ fms.s1 log_w = f8,f1,f1
+ mov log_GR_exp_17_ones = 0x1ffff
+}
+;;
+
+{ .mmi
+ getf.exp log_GR_signexp_f8 = f8 // If x unorm then must recompute
+ setf.exp log_half = log_GR_half_exp // Form 0.5 = -Q1
+ nop.i 999
+}
+;;
+
+{ .mmb
+ adds log_AD_2 = 0x30, log_AD_1
+ mov log_GR_exp_16_ones = 0xffff
+(p15) br.cond.spnt L(LOG_DENORM)
+}
+;;
+
+L(LOG_COMMON):
+{.mfi
+ ldfpd log_P5,log_P4 = [log_AD_1],16
+ fclass.m.unc p6,p0 = f8, 0xc3 // Test for x=nan
+ and log_GR_exp_f8 = log_GR_signexp_f8, log_GR_exp_17_ones
+}
+{.mfi
+ ldfpd log_P3,log_P2 = [log_AD_2],16
+ nop.f 999
+ nop.i 999
+}
+;;
+
+{ .mfi
+ ldfpd log_Q8,log_Q7 = [log_AD_1],16
+ fclass.m.unc p11,p0 = f8, 0x21 // Test for x=+inf
+ sub log_GR_true_exp_f8 = log_GR_exp_f8, log_GR_exp_16_ones
+}
+{ .mfi
+ ldfpd log_Q6,log_Q5 = [log_AD_2],16
+ nop.f 999
+ nop.i 999
+}
+;;
+
+
+{ .mfi
+ ldfpd log_Q4,log_Q3 = [log_AD_1],16
+ fma.s1 log_wsq = log_w, log_w, f0
+ nop.i 999
+}
+{ .mfb
+ ldfpd log_Q2,log_Q1 = [log_AD_2],16
+(p6) fma.d.s0 f8 = f8,f1,f0 // quietize nan result if x=nan
+(p6) br.ret.spnt b0 // Exit for x=nan
+}
+;;
+
+
+{ .mfi
+ setf.sig log_int_Nfloat = log_GR_true_exp_f8
+ fcmp.eq.s1 p10,p0 = log_NORM_f8, f1 // Test for x=+1.0
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+ fms.s1 log_r = log_C,f8,f1
+(p11) br.ret.spnt b0 // Exit for x=+inf
+}
+;;
+
+
+{ .mmf
+ getf.sig log_GR_significand_f8 = log_NORM_f8
+ ldfe log_inv_ln10 = [log_AD_2],16
+ fclass.m.unc p6,p0 = f8, 0x07 // Test for x=0
+}
+;;
+
+
+{ .mfb
+ nop.m 999
+(p10) fmerge.s f8 = f0, f0
+(p10) br.ret.spnt b0 // Exit for x=1.0
+;;
+}
+
+{ .mfi
+ getf.exp log_GR_signexp_w = log_w
+ fclass.m.unc p12,p0 = f8, 0x3a // Test for x neg norm, unorm, inf
+ shl log_GR_index = log_GR_significand_f8,1
+}
+;;
+
+{ .mfi
+ ldfe log_log2 = [log_AD_2],16
+ fnma.s1 log_rp_q10 = log_half, log_wsq, log_w
+ shr.u log_GR_index = log_GR_index,56
+}
+{ .mfb
+ nop.m 999
+ fma.s1 log_w3 = log_wsq, log_w, f0
+(p6) br.cond.spnt L(LOG_ZERO_NEG) // Branch if x=0
+;;
+}
+
+
+{ .mfi
+ and log_GR_exp_w = log_GR_exp_17_ones, log_GR_signexp_w
+ fma.s1 log_w4 = log_wsq, log_wsq, f0
+ nop.i 999
+}
+{ .mfb
+ shladd log_AD_2 = log_GR_index,4,log_AD_2
+ fma.s1 log_rsq = log_r, log_r, f0
+(p12) br.cond.spnt L(LOG_ZERO_NEG) // Branch if x<0
+;;
+}
+
+{ .mfi
+ ldfe log_T = [log_AD_2]
+ fma.s1 log_rp_p4 = log_P5, log_r, log_P4
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 log_rp_p32 = log_P3, log_r, log_P2
+ nop.i 999
+;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 log_rp_q7 = log_Q8, log_w, log_Q7
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 log_rp_q65 = log_Q6, log_w, log_Q5
+ nop.i 999
+;;
+}
+
+// p13 <== large w log
+// p14 <== small w log
+{ .mfi
+(p8) cmp.ge.unc p13,p14 = log_GR_exp_w, log_GR_fff9
+ fma.s1 log_rp_q3 = log_Q4, log_w, log_Q3
+ nop.i 999
+;;
+}
+
+// p10 <== large w log10
+// p11 <== small w log10
+{ .mfi
+(p7) cmp.ge.unc p10,p11 = log_GR_exp_w, log_GR_fff9
+ fcvt.xf log_Nfloat = log_int_Nfloat
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 log_rp_q21 = log_Q2, log_w3, log_rp_q10
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 log_rcube = log_rsq, log_r, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 log_rp_p10 = log_rsq, log_P1, log_r
+ nop.i 999
+;;
+}
+
+{ .mfi
+ nop.m 999
+ fcmp.eq.s0 p6,p0 = f8,f0 // Sets flag on +denormal input
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 log_rp_p2 = log_rp_p4, log_rsq, log_rp_p32
+ nop.i 999
+;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 log_w6 = log_w3, log_w3, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 log_Qlo = log_rp_q7, log_wsq, log_rp_q65
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 log_Qhi = log_rp_q3, log_w4, log_rp_q21
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 log_T_plus_Nlog2 = log_Nfloat,log_log2, log_T
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 log_r2P_r = log_rp_p2, log_rcube, log_rp_p10
+ nop.i 999 ;;
+}
+
+
+// small w, log <== p14
+{ .mfi
+ nop.m 999
+(p14) fma.d f8 = log_Qlo, log_w6, log_Qhi
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 log_Q = log_Qlo, log_w6, log_Qhi
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p10) fma.s1 log_log10_hi = log_T_plus_Nlog2, log_inv_ln10,f0
+ nop.i 999 ;;
+}
+
+// large w, log <== p13
+.pred.rel "mutex",p13,p10
+{ .mfi
+ nop.m 999
+(p13) fadd.d f8 = log_T_plus_Nlog2, log_r2P_r
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p10) fma.s1 log_log10_lo = log_inv_ln10, log_r2P_r,f0
+ nop.i 999 ;;
+}
+
+
+// small w, log10 <== p11
+{ .mfi
+ nop.m 999
+(p11) fma.d f8 = log_inv_ln10,log_Q,f0
+ nop.i 999 ;;
+}
+
+// large w, log10 <== p10
+{ .mfb
+ nop.m 999
+(p10) fma.d f8 = log_log10_hi, f1, log_log10_lo
+ br.ret.sptk b0
+;;
+}
+
+L(LOG_DENORM):
+{ .mfb
+ getf.exp log_GR_signexp_f8 = log_NORM_f8
+ nop.f 999
+ br.cond.sptk L(LOG_COMMON)
+}
+;;
+
+L(LOG_ZERO_NEG):
+
+// qnan snan inf norm unorm 0 -+
+// 0 0 0 0 0 1 11 0x7
+// 0 0 1 1 1 0 10 0x3a
+
+// Save x (f8) in f10
+{ .mfi
+ nop.m 999
+ fmerge.s f10 = f8,f8
+ nop.i 999 ;;
+}
+
+// p8 p9 means ln(+-0) = -inf
+// p7 p10 means log(+-0) = -inf
+
+// p13 means ln(-)
+// p14 means log(-)
+
+
+{ .mfi
+ nop.m 999
+ fmerge.ns f6 = f1,f1 // Form -1.0
+ nop.i 999 ;;
+}
+
+// p9 means ln(+-0) = -inf
+// p10 means log(+-0) = -inf
+// Log(+-0) = -inf
+
+{ .mfi
+ nop.m 999
+(p8) fclass.m.unc p9,p0 = f10, 0x07
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p7) fclass.m.unc p10,p0 = f10, 0x07
+ nop.i 999 ;;
+}
+
+
+// p13 ln(-)
+// p14 log(-)
+
+// Log(-inf, -normal, -unnormal) = QNAN indefinite
+{ .mfi
+ nop.m 999
+(p8) fclass.m.unc p13,p0 = f10, 0x3a
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p7) fclass.m.unc p14,p0 = f10, 0x3a
+ nop.i 999 ;;
+}
+
+
+.pred.rel "mutex",p9,p10
+{ .mfi
+(p9) mov log_GR_tag = 2
+(p9) frcpa f8,p11 = f6,f0
+ nop.i 999
+}
+{ .mfi
+(p10) mov log_GR_tag = 8
+(p10) frcpa f8,p12 = f6,f0
+ nop.i 999 ;;
+}
+
+.pred.rel "mutex",p13,p14
+{ .mfi
+(p13) mov log_GR_tag = 3
+(p13) frcpa f8,p11 = f0,f0
+ nop.i 999
+}
+{ .mfb
+(p14) mov log_GR_tag = 9
+(p14) frcpa f8,p12 = f0,f0
+ br.cond.sptk __libm_error_region ;;
+}
+.endp log
+ASM_SIZE_DIRECTIVE(log)
+ASM_SIZE_DIRECTIVE(__ieee754_log)
+
+
+// Stack operations when calling error support.
+// (1) (2) (3) (call) (4)
+// sp -> + psp -> + psp -> + sp -> +
+// | | | |
+// | | <- GR_Y R3 ->| <- GR_RESULT | -> f8
+// | | | |
+// | <-GR_Y Y2->| Y2 ->| <- GR_Y |
+// | | | |
+// | | <- GR_X X1 ->| |
+// | | | |
+// sp-64 -> + sp -> + sp -> + +
+// save ar.pfs save b0 restore gp
+// save gp restore ar.pfs
+
+
+
+.proc __libm_error_region
+__libm_error_region:
+.prologue
+
+// (1)
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+
+
+// (2)
+{ .mmi
+ stfd [GR_Parameter_Y] = f1,16 // STORE Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+
+.body
+// (3)
+{ .mib
+ stfd [GR_Parameter_X] = f10 // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
+}
+{ .mib
+ stfd [GR_Parameter_Y] = f8 // STORE Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+
+// (4)
+{ .mmi
+ ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_log10.c b/sysdeps/ia64/fpu/e_log10.c
new file mode 100644
index 0000000..41254ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_log10.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/e_log10f.c b/sysdeps/ia64/fpu/e_log10f.c
new file mode 100644
index 0000000..41254ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_log10f.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/e_log10l.c b/sysdeps/ia64/fpu/e_log10l.c
new file mode 100644
index 0000000..41254ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_log10l.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/e_logf.S b/sysdeps/ia64/fpu/e_logf.S
new file mode 100644
index 0000000..1799e4c
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_logf.S
@@ -0,0 +1,946 @@
+.file "logf.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 3/01/00 Initial version
+// 8/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+// 1/10/01 Improved speed, fixed flags for neg denormals
+//
+//
+// API
+//==============================================================
+// float logf(float)
+// float log10f(float)
+//
+// Overview of operation
+//==============================================================
+// Background
+//
+// Consider x = 2^N 1.f1 f2 f3 f4...f63
+// Log(x) = log(frcpa(x) x/frcpa(x))
+// = log(1/frcpa(x)) + log(frcpa(x) x)
+// = -log(frcpa(x)) + log(frcpa(x) x)
+//
+// frcpa(x) = 2^-N frcpa((1.f1 f2 ... f63)
+//
+// -log(frcpa(x)) = -log(C)
+// = -log(2^-N) - log(frcpa(1.f1 f2 ... f63))
+//
+// -log(frcpa(x)) = -log(C)
+// = +Nlog2 - log(frcpa(1.f1 f2 ... f63))
+//
+// -log(frcpa(x)) = -log(C)
+// = +Nlog2 + log(frcpa(1.f1 f2 ... f63))
+//
+// Log(x) = log(1/frcpa(x)) + log(frcpa(x) x)
+
+// Log(x) = +Nlog2 + log(1./frcpa(1.f1 f2 ... f63)) + log(frcpa(x) x)
+// Log(x) = +Nlog2 - log(/frcpa(1.f1 f2 ... f63)) + log(frcpa(x) x)
+// Log(x) = +Nlog2 + T + log(frcpa(x) x)
+//
+// Log(x) = +Nlog2 + T + log(C x)
+//
+// Cx = 1 + r
+//
+// Log(x) = +Nlog2 + T + log(1+r)
+// Log(x) = +Nlog2 + T + Series( r - r^2/2 + r^3/3 - r^4/4 ....)
+//
+// 1.f1 f2 ... f8 has 256 entries.
+// They are 1 + k/2^8, k = 0 ... 255
+// These 256 values are the table entries.
+//
+// Implementation
+//===============
+// CASE 1: |x-1| >= 2^-8
+// C = frcpa(x)
+// r = C * x - 1
+//
+// Form rseries = r + P1*r^2 + P2*r^3 + P3*r^4
+//
+// x = f * 2*n where f is 1.f_1f_2f_3....f_63
+// Nfloat = float(n) where n is the true unbiased exponent
+// pre-index = f_1f_2....f_8
+// index = pre_index * 16
+// get the dxt table entry at index + offset = T
+//
+// result = (T + Nfloat * log(2)) + rseries
+//
+// The T table is calculated as follows
+// Form x_k = 1 + k/2^8 where k goes from 0... 255
+// y_k = frcpa(x_k)
+// log(1/y_k) in quad and round to double
+
+// CASE 2: |x-1| < 2^-6
+// w = x - 1
+//
+// Form wseries = w + Q1*w^2 + Q2*w^3 + Q3*w^4
+//
+// result = wseries
+
+// Special values
+//==============================================================
+
+
+// log(+0) = -inf
+// log(-0) = -inf
+
+// log(+qnan) = +qnan
+// log(-qnan) = -qnan
+// log(+snan) = +qnan
+// log(-snan) = -qnan
+
+// log(-n) = QNAN Indefinite
+// log(-inf) = QNAN Indefinite
+
+// log(+inf) = +inf
+
+// Registers used
+//==============================================================
+// Floating Point registers used:
+// f8, input
+// f9 -> f15, f32 -> f47
+
+// General registers used:
+// r32 -> r51
+
+// Predicate registers used:
+// p6 -> p15
+
+// p8 log base e
+// p6 log base e special
+// p9 used in the frcpa
+// p13 log base e large W
+// p14 log base e small w
+
+// p7 log base 10
+// p10 log base 10 large W
+// p11 log base 10 small w
+// p12 log base 10 special
+
+#include "libm_support.h"
+
+// Assembly macros
+//==============================================================
+
+log_int_Nfloat = f9
+log_Nfloat = f10
+
+log_P3 = f11
+log_P2 = f12
+log_P1 = f13
+log_inv_ln10 = f14
+log_log2 = f15
+
+log_w = f32
+log_T = f33
+log_rp_p32 = f34
+log_rp_p2 = f35
+log_rp_p10 = f36
+log_rsq = f37
+log_T_plus_Nlog2 = f38
+log_r = f39
+log_C = f40
+log_rp_q32 = f41
+log_rp_q2 = f42
+log_rp_q10 = f43
+log_wsq = f44
+log_Q = f45
+log_inv_ln10 = f46
+log_NORM_f8 = f47
+
+// ===================================
+
+log_GR_exp_17_ones = r33
+log_GR_exp_16_ones = r34
+log_GR_exp_f8 = r35
+log_GR_signexp_f8 = r36
+log_GR_true_exp_f8 = r37
+log_GR_significand_f8 = r38
+log_GR_index = r39
+log_AD_1 = r40
+log_GR_signexp_w = r41
+log_GR_fff7 = r42
+log_AD_2 = r43
+log_GR_exp_w = r44
+
+GR_SAVE_B0 = r45
+GR_SAVE_GP = r46
+GR_SAVE_PFS = r47
+
+GR_Parameter_X = r48
+GR_Parameter_Y = r49
+GR_Parameter_RESULT = r50
+log_GR_tag = r51
+
+
+// Data tables
+//==============================================================
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
+.align 16
+
+log_table_1:
+ASM_TYPE_DIRECTIVE(log_table_1,@object)
+data8 0xbfd0001008f39d59 // p3
+data8 0x3fd5556073e0c45a // p2
+ASM_SIZE_DIRECTIVE(log_table_1)
+
+log_table_2:
+ASM_TYPE_DIRECTIVE(log_table_2,@object)
+data8 0xbfdffffffffaea15 // p1
+data8 0x3fdbcb7b1526e50e // 1/ln10
+data8 0x3fe62e42fefa39ef // Log(2)
+data8 0x0 // pad
+
+data8 0x3F60040155D5889E //log(1/frcpa(1+ 0/256)
+data8 0x3F78121214586B54 //log(1/frcpa(1+ 1/256)
+data8 0x3F841929F96832F0 //log(1/frcpa(1+ 2/256)
+data8 0x3F8C317384C75F06 //log(1/frcpa(1+ 3/256)
+data8 0x3F91A6B91AC73386 //log(1/frcpa(1+ 4/256)
+data8 0x3F95BA9A5D9AC039 //log(1/frcpa(1+ 5/256)
+data8 0x3F99D2A8074325F4 //log(1/frcpa(1+ 6/256)
+data8 0x3F9D6B2725979802 //log(1/frcpa(1+ 7/256)
+data8 0x3FA0C58FA19DFAAA //log(1/frcpa(1+ 8/256)
+data8 0x3FA2954C78CBCE1B //log(1/frcpa(1+ 9/256)
+data8 0x3FA4A94D2DA96C56 //log(1/frcpa(1+ 10/256)
+data8 0x3FA67C94F2D4BB58 //log(1/frcpa(1+ 11/256)
+data8 0x3FA85188B630F068 //log(1/frcpa(1+ 12/256)
+data8 0x3FAA6B8ABE73AF4C //log(1/frcpa(1+ 13/256)
+data8 0x3FAC441E06F72A9E //log(1/frcpa(1+ 14/256)
+data8 0x3FAE1E6713606D07 //log(1/frcpa(1+ 15/256)
+data8 0x3FAFFA6911AB9301 //log(1/frcpa(1+ 16/256)
+data8 0x3FB0EC139C5DA601 //log(1/frcpa(1+ 17/256)
+data8 0x3FB1DBD2643D190B //log(1/frcpa(1+ 18/256)
+data8 0x3FB2CC7284FE5F1C //log(1/frcpa(1+ 19/256)
+data8 0x3FB3BDF5A7D1EE64 //log(1/frcpa(1+ 20/256)
+data8 0x3FB4B05D7AA012E0 //log(1/frcpa(1+ 21/256)
+data8 0x3FB580DB7CEB5702 //log(1/frcpa(1+ 22/256)
+data8 0x3FB674F089365A7A //log(1/frcpa(1+ 23/256)
+data8 0x3FB769EF2C6B568D //log(1/frcpa(1+ 24/256)
+data8 0x3FB85FD927506A48 //log(1/frcpa(1+ 25/256)
+data8 0x3FB9335E5D594989 //log(1/frcpa(1+ 26/256)
+data8 0x3FBA2B0220C8E5F5 //log(1/frcpa(1+ 27/256)
+data8 0x3FBB0004AC1A86AC //log(1/frcpa(1+ 28/256)
+data8 0x3FBBF968769FCA11 //log(1/frcpa(1+ 29/256)
+data8 0x3FBCCFEDBFEE13A8 //log(1/frcpa(1+ 30/256)
+data8 0x3FBDA727638446A2 //log(1/frcpa(1+ 31/256)
+data8 0x3FBEA3257FE10F7A //log(1/frcpa(1+ 32/256)
+data8 0x3FBF7BE9FEDBFDE6 //log(1/frcpa(1+ 33/256)
+data8 0x3FC02AB352FF25F4 //log(1/frcpa(1+ 34/256)
+data8 0x3FC097CE579D204D //log(1/frcpa(1+ 35/256)
+data8 0x3FC1178E8227E47C //log(1/frcpa(1+ 36/256)
+data8 0x3FC185747DBECF34 //log(1/frcpa(1+ 37/256)
+data8 0x3FC1F3B925F25D41 //log(1/frcpa(1+ 38/256)
+data8 0x3FC2625D1E6DDF57 //log(1/frcpa(1+ 39/256)
+data8 0x3FC2D1610C86813A //log(1/frcpa(1+ 40/256)
+data8 0x3FC340C59741142E //log(1/frcpa(1+ 41/256)
+data8 0x3FC3B08B6757F2A9 //log(1/frcpa(1+ 42/256)
+data8 0x3FC40DFB08378003 //log(1/frcpa(1+ 43/256)
+data8 0x3FC47E74E8CA5F7C //log(1/frcpa(1+ 44/256)
+data8 0x3FC4EF51F6466DE4 //log(1/frcpa(1+ 45/256)
+data8 0x3FC56092E02BA516 //log(1/frcpa(1+ 46/256)
+data8 0x3FC5D23857CD74D5 //log(1/frcpa(1+ 47/256)
+data8 0x3FC6313A37335D76 //log(1/frcpa(1+ 48/256)
+data8 0x3FC6A399DABBD383 //log(1/frcpa(1+ 49/256)
+data8 0x3FC70337DD3CE41B //log(1/frcpa(1+ 50/256)
+data8 0x3FC77654128F6127 //log(1/frcpa(1+ 51/256)
+data8 0x3FC7E9D82A0B022D //log(1/frcpa(1+ 52/256)
+data8 0x3FC84A6B759F512F //log(1/frcpa(1+ 53/256)
+data8 0x3FC8AB47D5F5A310 //log(1/frcpa(1+ 54/256)
+data8 0x3FC91FE49096581B //log(1/frcpa(1+ 55/256)
+data8 0x3FC981634011AA75 //log(1/frcpa(1+ 56/256)
+data8 0x3FC9F6C407089664 //log(1/frcpa(1+ 57/256)
+data8 0x3FCA58E729348F43 //log(1/frcpa(1+ 58/256)
+data8 0x3FCABB55C31693AD //log(1/frcpa(1+ 59/256)
+data8 0x3FCB1E104919EFD0 //log(1/frcpa(1+ 60/256)
+data8 0x3FCB94EE93E367CB //log(1/frcpa(1+ 61/256)
+data8 0x3FCBF851C067555F //log(1/frcpa(1+ 62/256)
+data8 0x3FCC5C0254BF23A6 //log(1/frcpa(1+ 63/256)
+data8 0x3FCCC000C9DB3C52 //log(1/frcpa(1+ 64/256)
+data8 0x3FCD244D99C85674 //log(1/frcpa(1+ 65/256)
+data8 0x3FCD88E93FB2F450 //log(1/frcpa(1+ 66/256)
+data8 0x3FCDEDD437EAEF01 //log(1/frcpa(1+ 67/256)
+data8 0x3FCE530EFFE71012 //log(1/frcpa(1+ 68/256)
+data8 0x3FCEB89A1648B971 //log(1/frcpa(1+ 69/256)
+data8 0x3FCF1E75FADF9BDE //log(1/frcpa(1+ 70/256)
+data8 0x3FCF84A32EAD7C35 //log(1/frcpa(1+ 71/256)
+data8 0x3FCFEB2233EA07CD //log(1/frcpa(1+ 72/256)
+data8 0x3FD028F9C7035C1C //log(1/frcpa(1+ 73/256)
+data8 0x3FD05C8BE0D9635A //log(1/frcpa(1+ 74/256)
+data8 0x3FD085EB8F8AE797 //log(1/frcpa(1+ 75/256)
+data8 0x3FD0B9C8E32D1911 //log(1/frcpa(1+ 76/256)
+data8 0x3FD0EDD060B78081 //log(1/frcpa(1+ 77/256)
+data8 0x3FD122024CF0063F //log(1/frcpa(1+ 78/256)
+data8 0x3FD14BE2927AECD4 //log(1/frcpa(1+ 79/256)
+data8 0x3FD180618EF18ADF //log(1/frcpa(1+ 80/256)
+data8 0x3FD1B50BBE2FC63B //log(1/frcpa(1+ 81/256)
+data8 0x3FD1DF4CC7CF242D //log(1/frcpa(1+ 82/256)
+data8 0x3FD214456D0EB8D4 //log(1/frcpa(1+ 83/256)
+data8 0x3FD23EC5991EBA49 //log(1/frcpa(1+ 84/256)
+data8 0x3FD2740D9F870AFB //log(1/frcpa(1+ 85/256)
+data8 0x3FD29ECDABCDFA04 //log(1/frcpa(1+ 86/256)
+data8 0x3FD2D46602ADCCEE //log(1/frcpa(1+ 87/256)
+data8 0x3FD2FF66B04EA9D4 //log(1/frcpa(1+ 88/256)
+data8 0x3FD335504B355A37 //log(1/frcpa(1+ 89/256)
+data8 0x3FD360925EC44F5D //log(1/frcpa(1+ 90/256)
+data8 0x3FD38BF1C3337E75 //log(1/frcpa(1+ 91/256)
+data8 0x3FD3C25277333184 //log(1/frcpa(1+ 92/256)
+data8 0x3FD3EDF463C1683E //log(1/frcpa(1+ 93/256)
+data8 0x3FD419B423D5E8C7 //log(1/frcpa(1+ 94/256)
+data8 0x3FD44591E0539F49 //log(1/frcpa(1+ 95/256)
+data8 0x3FD47C9175B6F0AD //log(1/frcpa(1+ 96/256)
+data8 0x3FD4A8B341552B09 //log(1/frcpa(1+ 97/256)
+data8 0x3FD4D4F3908901A0 //log(1/frcpa(1+ 98/256)
+data8 0x3FD501528DA1F968 //log(1/frcpa(1+ 99/256)
+data8 0x3FD52DD06347D4F6 //log(1/frcpa(1+ 100/256)
+data8 0x3FD55A6D3C7B8A8A //log(1/frcpa(1+ 101/256)
+data8 0x3FD5925D2B112A59 //log(1/frcpa(1+ 102/256)
+data8 0x3FD5BF406B543DB2 //log(1/frcpa(1+ 103/256)
+data8 0x3FD5EC433D5C35AE //log(1/frcpa(1+ 104/256)
+data8 0x3FD61965CDB02C1F //log(1/frcpa(1+ 105/256)
+data8 0x3FD646A84935B2A2 //log(1/frcpa(1+ 106/256)
+data8 0x3FD6740ADD31DE94 //log(1/frcpa(1+ 107/256)
+data8 0x3FD6A18DB74A58C5 //log(1/frcpa(1+ 108/256)
+data8 0x3FD6CF31058670EC //log(1/frcpa(1+ 109/256)
+data8 0x3FD6F180E852F0BA //log(1/frcpa(1+ 110/256)
+data8 0x3FD71F5D71B894F0 //log(1/frcpa(1+ 111/256)
+data8 0x3FD74D5AEFD66D5C //log(1/frcpa(1+ 112/256)
+data8 0x3FD77B79922BD37E //log(1/frcpa(1+ 113/256)
+data8 0x3FD7A9B9889F19E2 //log(1/frcpa(1+ 114/256)
+data8 0x3FD7D81B037EB6A6 //log(1/frcpa(1+ 115/256)
+data8 0x3FD8069E33827231 //log(1/frcpa(1+ 116/256)
+data8 0x3FD82996D3EF8BCB //log(1/frcpa(1+ 117/256)
+data8 0x3FD85855776DCBFB //log(1/frcpa(1+ 118/256)
+data8 0x3FD8873658327CCF //log(1/frcpa(1+ 119/256)
+data8 0x3FD8AA75973AB8CF //log(1/frcpa(1+ 120/256)
+data8 0x3FD8D992DC8824E5 //log(1/frcpa(1+ 121/256)
+data8 0x3FD908D2EA7D9512 //log(1/frcpa(1+ 122/256)
+data8 0x3FD92C59E79C0E56 //log(1/frcpa(1+ 123/256)
+data8 0x3FD95BD750EE3ED3 //log(1/frcpa(1+ 124/256)
+data8 0x3FD98B7811A3EE5B //log(1/frcpa(1+ 125/256)
+data8 0x3FD9AF47F33D406C //log(1/frcpa(1+ 126/256)
+data8 0x3FD9DF270C1914A8 //log(1/frcpa(1+ 127/256)
+data8 0x3FDA0325ED14FDA4 //log(1/frcpa(1+ 128/256)
+data8 0x3FDA33440224FA79 //log(1/frcpa(1+ 129/256)
+data8 0x3FDA57725E80C383 //log(1/frcpa(1+ 130/256)
+data8 0x3FDA87D0165DD199 //log(1/frcpa(1+ 131/256)
+data8 0x3FDAAC2E6C03F896 //log(1/frcpa(1+ 132/256)
+data8 0x3FDADCCC6FDF6A81 //log(1/frcpa(1+ 133/256)
+data8 0x3FDB015B3EB1E790 //log(1/frcpa(1+ 134/256)
+data8 0x3FDB323A3A635948 //log(1/frcpa(1+ 135/256)
+data8 0x3FDB56FA04462909 //log(1/frcpa(1+ 136/256)
+data8 0x3FDB881AA659BC93 //log(1/frcpa(1+ 137/256)
+data8 0x3FDBAD0BEF3DB165 //log(1/frcpa(1+ 138/256)
+data8 0x3FDBD21297781C2F //log(1/frcpa(1+ 139/256)
+data8 0x3FDC039236F08819 //log(1/frcpa(1+ 140/256)
+data8 0x3FDC28CB1E4D32FD //log(1/frcpa(1+ 141/256)
+data8 0x3FDC4E19B84723C2 //log(1/frcpa(1+ 142/256)
+data8 0x3FDC7FF9C74554C9 //log(1/frcpa(1+ 143/256)
+data8 0x3FDCA57B64E9DB05 //log(1/frcpa(1+ 144/256)
+data8 0x3FDCCB130A5CEBB0 //log(1/frcpa(1+ 145/256)
+data8 0x3FDCF0C0D18F326F //log(1/frcpa(1+ 146/256)
+data8 0x3FDD232075B5A201 //log(1/frcpa(1+ 147/256)
+data8 0x3FDD490246DEFA6B //log(1/frcpa(1+ 148/256)
+data8 0x3FDD6EFA918D25CD //log(1/frcpa(1+ 149/256)
+data8 0x3FDD9509707AE52F //log(1/frcpa(1+ 150/256)
+data8 0x3FDDBB2EFE92C554 //log(1/frcpa(1+ 151/256)
+data8 0x3FDDEE2F3445E4AF //log(1/frcpa(1+ 152/256)
+data8 0x3FDE148A1A2726CE //log(1/frcpa(1+ 153/256)
+data8 0x3FDE3AFC0A49FF40 //log(1/frcpa(1+ 154/256)
+data8 0x3FDE6185206D516E //log(1/frcpa(1+ 155/256)
+data8 0x3FDE882578823D52 //log(1/frcpa(1+ 156/256)
+data8 0x3FDEAEDD2EAC990C //log(1/frcpa(1+ 157/256)
+data8 0x3FDED5AC5F436BE3 //log(1/frcpa(1+ 158/256)
+data8 0x3FDEFC9326D16AB9 //log(1/frcpa(1+ 159/256)
+data8 0x3FDF2391A2157600 //log(1/frcpa(1+ 160/256)
+data8 0x3FDF4AA7EE03192D //log(1/frcpa(1+ 161/256)
+data8 0x3FDF71D627C30BB0 //log(1/frcpa(1+ 162/256)
+data8 0x3FDF991C6CB3B379 //log(1/frcpa(1+ 163/256)
+data8 0x3FDFC07ADA69A910 //log(1/frcpa(1+ 164/256)
+data8 0x3FDFE7F18EB03D3E //log(1/frcpa(1+ 165/256)
+data8 0x3FE007C053C5002E //log(1/frcpa(1+ 166/256)
+data8 0x3FE01B942198A5A1 //log(1/frcpa(1+ 167/256)
+data8 0x3FE02F74400C64EB //log(1/frcpa(1+ 168/256)
+data8 0x3FE04360BE7603AD //log(1/frcpa(1+ 169/256)
+data8 0x3FE05759AC47FE34 //log(1/frcpa(1+ 170/256)
+data8 0x3FE06B5F1911CF52 //log(1/frcpa(1+ 171/256)
+data8 0x3FE078BF0533C568 //log(1/frcpa(1+ 172/256)
+data8 0x3FE08CD9687E7B0E //log(1/frcpa(1+ 173/256)
+data8 0x3FE0A10074CF9019 //log(1/frcpa(1+ 174/256)
+data8 0x3FE0B5343A234477 //log(1/frcpa(1+ 175/256)
+data8 0x3FE0C974C89431CE //log(1/frcpa(1+ 176/256)
+data8 0x3FE0DDC2305B9886 //log(1/frcpa(1+ 177/256)
+data8 0x3FE0EB524BAFC918 //log(1/frcpa(1+ 178/256)
+data8 0x3FE0FFB54213A476 //log(1/frcpa(1+ 179/256)
+data8 0x3FE114253DA97D9F //log(1/frcpa(1+ 180/256)
+data8 0x3FE128A24F1D9AFF //log(1/frcpa(1+ 181/256)
+data8 0x3FE1365252BF0865 //log(1/frcpa(1+ 182/256)
+data8 0x3FE14AE558B4A92D //log(1/frcpa(1+ 183/256)
+data8 0x3FE15F85A19C765B //log(1/frcpa(1+ 184/256)
+data8 0x3FE16D4D38C119FA //log(1/frcpa(1+ 185/256)
+data8 0x3FE18203C20DD133 //log(1/frcpa(1+ 186/256)
+data8 0x3FE196C7BC4B1F3B //log(1/frcpa(1+ 187/256)
+data8 0x3FE1A4A738B7A33C //log(1/frcpa(1+ 188/256)
+data8 0x3FE1B981C0C9653D //log(1/frcpa(1+ 189/256)
+data8 0x3FE1CE69E8BB106B //log(1/frcpa(1+ 190/256)
+data8 0x3FE1DC619DE06944 //log(1/frcpa(1+ 191/256)
+data8 0x3FE1F160A2AD0DA4 //log(1/frcpa(1+ 192/256)
+data8 0x3FE2066D7740737E //log(1/frcpa(1+ 193/256)
+data8 0x3FE2147DBA47A394 //log(1/frcpa(1+ 194/256)
+data8 0x3FE229A1BC5EBAC3 //log(1/frcpa(1+ 195/256)
+data8 0x3FE237C1841A502E //log(1/frcpa(1+ 196/256)
+data8 0x3FE24CFCE6F80D9A //log(1/frcpa(1+ 197/256)
+data8 0x3FE25B2C55CD5762 //log(1/frcpa(1+ 198/256)
+data8 0x3FE2707F4D5F7C41 //log(1/frcpa(1+ 199/256)
+data8 0x3FE285E0842CA384 //log(1/frcpa(1+ 200/256)
+data8 0x3FE294294708B773 //log(1/frcpa(1+ 201/256)
+data8 0x3FE2A9A2670AFF0C //log(1/frcpa(1+ 202/256)
+data8 0x3FE2B7FB2C8D1CC1 //log(1/frcpa(1+ 203/256)
+data8 0x3FE2C65A6395F5F5 //log(1/frcpa(1+ 204/256)
+data8 0x3FE2DBF557B0DF43 //log(1/frcpa(1+ 205/256)
+data8 0x3FE2EA64C3F97655 //log(1/frcpa(1+ 206/256)
+data8 0x3FE3001823684D73 //log(1/frcpa(1+ 207/256)
+data8 0x3FE30E97E9A8B5CD //log(1/frcpa(1+ 208/256)
+data8 0x3FE32463EBDD34EA //log(1/frcpa(1+ 209/256)
+data8 0x3FE332F4314AD796 //log(1/frcpa(1+ 210/256)
+data8 0x3FE348D90E7464D0 //log(1/frcpa(1+ 211/256)
+data8 0x3FE35779F8C43D6E //log(1/frcpa(1+ 212/256)
+data8 0x3FE36621961A6A99 //log(1/frcpa(1+ 213/256)
+data8 0x3FE37C299F3C366A //log(1/frcpa(1+ 214/256)
+data8 0x3FE38AE2171976E7 //log(1/frcpa(1+ 215/256)
+data8 0x3FE399A157A603E7 //log(1/frcpa(1+ 216/256)
+data8 0x3FE3AFCCFE77B9D1 //log(1/frcpa(1+ 217/256)
+data8 0x3FE3BE9D503533B5 //log(1/frcpa(1+ 218/256)
+data8 0x3FE3CD7480B4A8A3 //log(1/frcpa(1+ 219/256)
+data8 0x3FE3E3C43918F76C //log(1/frcpa(1+ 220/256)
+data8 0x3FE3F2ACB27ED6C7 //log(1/frcpa(1+ 221/256)
+data8 0x3FE4019C2125CA93 //log(1/frcpa(1+ 222/256)
+data8 0x3FE4181061389722 //log(1/frcpa(1+ 223/256)
+data8 0x3FE42711518DF545 //log(1/frcpa(1+ 224/256)
+data8 0x3FE436194E12B6BF //log(1/frcpa(1+ 225/256)
+data8 0x3FE445285D68EA69 //log(1/frcpa(1+ 226/256)
+data8 0x3FE45BCC464C893A //log(1/frcpa(1+ 227/256)
+data8 0x3FE46AED21F117FC //log(1/frcpa(1+ 228/256)
+data8 0x3FE47A1527E8A2D3 //log(1/frcpa(1+ 229/256)
+data8 0x3FE489445EFFFCCC //log(1/frcpa(1+ 230/256)
+data8 0x3FE4A018BCB69835 //log(1/frcpa(1+ 231/256)
+data8 0x3FE4AF5A0C9D65D7 //log(1/frcpa(1+ 232/256)
+data8 0x3FE4BEA2A5BDBE87 //log(1/frcpa(1+ 233/256)
+data8 0x3FE4CDF28F10AC46 //log(1/frcpa(1+ 234/256)
+data8 0x3FE4DD49CF994058 //log(1/frcpa(1+ 235/256)
+data8 0x3FE4ECA86E64A684 //log(1/frcpa(1+ 236/256)
+data8 0x3FE503C43CD8EB68 //log(1/frcpa(1+ 237/256)
+data8 0x3FE513356667FC57 //log(1/frcpa(1+ 238/256)
+data8 0x3FE522AE0738A3D8 //log(1/frcpa(1+ 239/256)
+data8 0x3FE5322E26867857 //log(1/frcpa(1+ 240/256)
+data8 0x3FE541B5CB979809 //log(1/frcpa(1+ 241/256)
+data8 0x3FE55144FDBCBD62 //log(1/frcpa(1+ 242/256)
+data8 0x3FE560DBC45153C7 //log(1/frcpa(1+ 243/256)
+data8 0x3FE5707A26BB8C66 //log(1/frcpa(1+ 244/256)
+data8 0x3FE587F60ED5B900 //log(1/frcpa(1+ 245/256)
+data8 0x3FE597A7977C8F31 //log(1/frcpa(1+ 246/256)
+data8 0x3FE5A760D634BB8B //log(1/frcpa(1+ 247/256)
+data8 0x3FE5B721D295F10F //log(1/frcpa(1+ 248/256)
+data8 0x3FE5C6EA94431EF9 //log(1/frcpa(1+ 249/256)
+data8 0x3FE5D6BB22EA86F6 //log(1/frcpa(1+ 250/256)
+data8 0x3FE5E6938645D390 //log(1/frcpa(1+ 251/256)
+data8 0x3FE5F673C61A2ED2 //log(1/frcpa(1+ 252/256)
+data8 0x3FE6065BEA385926 //log(1/frcpa(1+ 253/256)
+data8 0x3FE6164BFA7CC06B //log(1/frcpa(1+ 254/256)
+data8 0x3FE62643FECF9743 //log(1/frcpa(1+ 255/256)
+ASM_SIZE_DIRECTIVE(log_table_2)
+
+
+.align 32
+.global logf#
+.global log10f#
+
+// log10 has p7 true, p8 false
+// log has p8 true, p7 false
+
+.section .text
+.proc log10f#
+.align 32
+
+log10f:
+#ifdef _LIBC
+.global __ieee754_log10f
+.type __ieee754_log10f,@function
+__ieee754_log10f:
+#endif
+{ .mfi
+ alloc r32=ar.pfs,1,15,4,0
+ frcpa.s1 log_C,p9 = f1,f8
+ cmp.eq.unc p7,p8 = r0, r0
+}
+{ .mfb
+ addl log_AD_1 = @ltoff(log_table_1), gp
+ fnorm.s1 log_NORM_f8 = f8
+ br.sptk L(LOG_LOG10_X)
+}
+;;
+
+.endp log10f
+ASM_SIZE_DIRECTIVE(log10f)
+ASM_SIZE_DIRECTIVE(__ieee754_log10f)
+
+
+
+.section .text
+.proc logf#
+.align 32
+logf:
+#ifdef _LIBC
+.global __ieee754_logf
+.type __ieee754_logf,@function
+__ieee754_logf:
+#endif
+
+{ .mfi
+ alloc r32=ar.pfs,1,15,4,0
+ frcpa.s1 log_C,p9 = f1,f8
+ cmp.eq.unc p8,p7 = r0, r0
+}
+{ .mfi
+ addl log_AD_1 = @ltoff(log_table_1), gp
+ fnorm.s1 log_NORM_f8 = f8
+ nop.i 999
+}
+;;
+
+L(LOG_LOG10_X):
+
+{ .mfi
+ getf.exp log_GR_signexp_f8 = f8 // If x unorm then must recompute
+ fclass.m.unc p15,p0 = f8, 0x0b // Test for x=unorm
+ mov log_GR_fff7 = 0xfff7
+}
+{ .mfi
+ ld8 log_AD_1 = [log_AD_1]
+ fms.s1 log_w = f8,f1,f1
+ mov log_GR_exp_17_ones = 0x1ffff
+}
+;;
+
+{ .mmi
+ getf.sig log_GR_significand_f8 = f8 // If x unorm then must recompute
+ mov log_GR_exp_16_ones = 0xffff
+ nop.i 999
+}
+;;
+
+{ .mmb
+ adds log_AD_2 = 0x10, log_AD_1
+ and log_GR_exp_f8 = log_GR_signexp_f8, log_GR_exp_17_ones
+(p15) br.cond.spnt L(LOG_DENORM)
+}
+;;
+
+L(LOG_COMMON):
+{.mfi
+ ldfpd log_P3,log_P2 = [log_AD_1],16
+ fclass.m.unc p6,p0 = f8, 0xc3 // Test for x=nan
+ shl log_GR_index = log_GR_significand_f8,1
+}
+{.mfi
+ sub log_GR_true_exp_f8 = log_GR_exp_f8, log_GR_exp_16_ones
+ nop.f 999
+ nop.i 999
+}
+;;
+
+{ .mfi
+ ldfpd log_P1,log_inv_ln10 = [log_AD_2],16
+ fclass.m.unc p11,p0 = f8, 0x21 // Test for x=+inf
+ shr.u log_GR_index = log_GR_index,56
+}
+{ .mfi
+ setf.sig log_int_Nfloat = log_GR_true_exp_f8
+ nop.f 999
+ nop.i 999
+}
+;;
+
+
+{ .mfi
+ ldfd log_log2 = [log_AD_2],16
+ fma.s1 log_wsq = log_w, log_w, f0
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p6) fma.s.s0 f8 = f8,f1,f0 // quietize nan result if x=nan
+(p6) br.ret.spnt b0 // Exit for x=nan
+}
+;;
+
+
+{ .mfi
+ shladd log_AD_2 = log_GR_index,3,log_AD_2
+ fcmp.eq.s1 p10,p0 = log_NORM_f8, f1 // Test for x=+1.0
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+ fms.s1 log_r = log_C,f8,f1
+(p11) br.ret.spnt b0 // Exit for x=+inf
+}
+;;
+
+
+{ .mmf
+ nop.m 999
+ nop.m 999
+ fclass.m.unc p6,p0 = f8, 0x07 // Test for x=0
+}
+;;
+
+
+{ .mfb
+ ldfd log_T = [log_AD_2]
+(p10) fmerge.s f8 = f0, f0
+(p10) br.ret.spnt b0 // Exit for x=1.0
+;;
+}
+
+{ .mfi
+ getf.exp log_GR_signexp_w = log_w
+ fclass.m.unc p12,p0 = f8, 0x3a // Test for x neg norm, unorm, inf
+ nop.i 999
+}
+;;
+
+{ .mmb
+ nop.m 999
+ nop.m 999
+(p6) br.cond.spnt L(LOG_ZERO_NEG) // Branch if x=0
+;;
+}
+
+
+{ .mfi
+ and log_GR_exp_w = log_GR_exp_17_ones, log_GR_signexp_w
+ nop.f 999
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+ fma.s1 log_rsq = log_r, log_r, f0
+(p12) br.cond.spnt L(LOG_ZERO_NEG) // Branch if x<0
+;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 log_rp_p32 = log_P3, log_r, log_P2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 log_rp_q32 = log_P3, log_w, log_P2
+ nop.i 999
+;;
+}
+
+{ .mfi
+ nop.m 999
+ fcvt.xf log_Nfloat = log_int_Nfloat
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 log_rp_p10 = log_P1, log_r, f1
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 log_rp_q10 = log_P1, log_w, f1
+ nop.i 999
+;;
+}
+
+// p13 <== large w log
+// p14 <== small w log
+{ .mfi
+(p8) cmp.ge.unc p13,p14 = log_GR_exp_w, log_GR_fff7
+ fcmp.eq.s0 p6,p0 = f8,f0 // Sets flag on +denormal input
+ nop.i 999
+;;
+}
+
+// p10 <== large w log10
+// p11 <== small w log10
+{ .mfi
+(p7) cmp.ge.unc p10,p11 = log_GR_exp_w, log_GR_fff7
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 log_T_plus_Nlog2 = log_Nfloat,log_log2, log_T
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 log_rp_p2 = log_rp_p32, log_rsq, log_rp_p10
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 log_rp_q2 = log_rp_q32, log_wsq, log_rp_q10
+ nop.i 999
+;;
+}
+
+
+// small w, log <== p14
+{ .mfi
+ nop.m 999
+(p14) fma.s f8 = log_rp_q2, log_w, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p11) fma.s1 log_Q = log_rp_q2, log_w, f0
+ nop.i 999 ;;
+}
+
+
+// large w, log <== p13
+.pred.rel "mutex",p13,p10
+{ .mfi
+ nop.m 999
+(p13) fma.s f8 = log_rp_p2, log_r, log_T_plus_Nlog2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p10) fma.s1 log_Q = log_rp_p2, log_r, log_T_plus_Nlog2
+ nop.i 999 ;;
+}
+
+
+// log10
+{ .mfb
+ nop.m 999
+(p7) fma.s f8 = log_inv_ln10,log_Q,f0
+ br.ret.sptk b0
+;;
+}
+
+
+L(LOG_DENORM):
+{ .mmi
+ getf.exp log_GR_signexp_f8 = log_NORM_f8
+ nop.m 999
+ nop.i 999
+}
+;;
+{ .mmb
+ getf.sig log_GR_significand_f8 = log_NORM_f8
+ and log_GR_exp_f8 = log_GR_signexp_f8, log_GR_exp_17_ones
+ br.cond.sptk L(LOG_COMMON)
+}
+;;
+
+L(LOG_ZERO_NEG):
+
+// qnan snan inf norm unorm 0 -+
+// 0 0 0 0 0 1 11 0x7
+// 0 0 1 1 1 0 10 0x3a
+
+// Save x (f8) in f10
+{ .mfi
+ nop.m 999
+ fmerge.s f10 = f8,f8
+ nop.i 999 ;;
+}
+
+// p8 p9 means ln(+-0) = -inf
+// p7 p10 means log(+-0) = -inf
+
+// p13 means ln(-)
+// p14 means log(-)
+
+
+{ .mfi
+ nop.m 999
+ fmerge.ns f6 = f1,f1 // Form -1.0
+ nop.i 999 ;;
+}
+
+// p9 means ln(+-0) = -inf
+// p10 means log(+-0) = -inf
+// Log(+-0) = -inf
+
+{ .mfi
+ nop.m 999
+(p8) fclass.m.unc p9,p0 = f10, 0x07
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p7) fclass.m.unc p10,p0 = f10, 0x07
+ nop.i 999 ;;
+}
+
+
+// p13 ln(-)
+// p14 log(-)
+
+// Log(-inf, -normal, -unnormal) = QNAN indefinite
+{ .mfi
+ nop.m 999
+(p8) fclass.m.unc p13,p0 = f10, 0x3a
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p7) fclass.m.unc p14,p0 = f10, 0x3a
+ nop.i 999 ;;
+}
+
+
+.pred.rel "mutex",p9,p10
+{ .mfi
+(p9) mov log_GR_tag = 4
+(p9) frcpa f8,p11 = f6,f0
+ nop.i 999
+}
+{ .mfi
+(p10) mov log_GR_tag = 10
+(p10) frcpa f8,p12 = f6,f0
+ nop.i 999 ;;
+}
+
+.pred.rel "mutex",p13,p14
+{ .mfi
+(p13) mov log_GR_tag = 5
+(p13) frcpa f8,p11 = f0,f0
+ nop.i 999
+}
+{ .mfb
+(p14) mov log_GR_tag = 11
+(p14) frcpa f8,p12 = f0,f0
+ br.cond.sptk __libm_error_region ;;
+}
+.endp logf
+ASM_SIZE_DIRECTIVE(logf)
+ASM_SIZE_DIRECTIVE(__ieee754_logf)
+
+
+// Stack operations when calling error support.
+// (1) (2) (3) (call) (4)
+// sp -> + psp -> + psp -> + sp -> +
+// | | | |
+// | | <- GR_Y R3 ->| <- GR_RESULT | -> f8
+// | | | |
+// | <-GR_Y Y2->| Y2 ->| <- GR_Y |
+// | | | |
+// | | <- GR_X X1 ->| |
+// | | | |
+// sp-64 -> + sp -> + sp -> + +
+// save ar.pfs save b0 restore gp
+// save gp restore ar.pfs
+
+
+
+.proc __libm_error_region
+__libm_error_region:
+.prologue
+
+// (1)
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+
+
+// (2)
+{ .mmi
+ stfs [GR_Parameter_Y] = f1,16 // STORE Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+
+.body
+// (3)
+{ .mib
+ stfs [GR_Parameter_X] = f10 // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
+}
+{ .mib
+ stfs [GR_Parameter_Y] = f8 // STORE Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+
+// (4)
+{ .mmi
+ ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_logl.c b/sysdeps/ia64/fpu/e_logl.c
new file mode 100644
index 0000000..41254ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_logl.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/e_pow.S b/sysdeps/ia64/fpu/e_pow.S
new file mode 100644
index 0000000..acc3ed8
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_pow.S
@@ -0,0 +1,2309 @@
+.file "pow.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00 Initial version
+// 2/03/00 Added p12 to definite over/under path. With odd power we did not
+// maintain the sign of x in this path.
+// 4/04/00 Unwind support added
+// 4/19/00 pow(+-1,inf) now returns NaN
+// pow(+-val, +-inf) returns 0 or inf, but now does not call error support
+// Added s1 to fcvt.fx because invalid flag was incorrectly set.
+// 8/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+// 9/07/00 Improved performance by eliminating bank conflicts and other stalls,
+// and tweaking the critical path
+// 9/08/00 Per c99, pow(+-1,inf) now returns 1, and pow(+1,nan) returns 1
+// 9/28/00 Updated NaN**0 path
+// 1/20/01 Fixed denormal flag settings.
+// 2/12/01 Improved speed.
+//
+// API
+//==============================================================
+// double pow(double)
+// float powf(float)
+//
+// Overview of operation
+//==============================================================
+//
+// Three steps...
+// 1. Log(x)
+// 2. y Log(x)
+// 3. exp(y log(x))
+//
+// This means we work with the absolute value of x and merge in the sign later.
+// Log(x) = G + delta + r -rsq/2 + p
+// G,delta depend on the exponent of x and table entries. The table entries are
+// indexed by the exponent of x, called K.
+//
+// The G and delta come out of the reduction; r is the reduced x.
+//
+// B = frcpa(x)
+// xB-1 is small means that B is the approximate inverse of x.
+//
+// Log(x) = Log( (1/B)(Bx) )
+// = Log(1/B) + Log(Bx)
+// = Log(1/B) + Log( 1 + (Bx-1))
+//
+// x = 2^K 1.x_1x_2.....x_52
+// B= frcpa(x) = 2^-k Cm
+// Log(1/B) = Log(1/(2^-K Cm))
+// Log(1/B) = Log((2^K/ Cm))
+// Log(1/B) = K Log(2) + Log(1/Cm)
+//
+// Log(x) = K Log(2) + Log(1/Cm) + Log( 1 + (Bx-1))
+//
+// If you take the significand of x, set the exponent to true 0, then Cm is
+// the frcpa. We tabulate the Log(1/Cm) values. There are 256 of them.
+// The frcpa table is indexed by 8 bits, the x_1 thru x_8.
+// m = x_1x_2...x_8 is an 8-bit index.
+//
+// Log(1/Cm) = log(1/frcpa(1+m/256)) where m goes from 0 to 255.
+//
+// We tabluate as two doubles, T and t, where T +t is the value itself.
+//
+// Log(x) = (K Log(2)_hi + T) + (Log(2)_hi + t) + Log( 1 + (Bx-1))
+// Log(x) = G + delta + Log( 1 + (Bx-1))
+//
+// The Log( 1 + (Bx-1)) can be calculated as a series in r = Bx-1.
+//
+// Log( 1 + (Bx-1)) = r - rsq/2 + p
+//
+// Then,
+//
+// yLog(x) = yG + y delta + y(r-rsq/2) + yp
+// yLog(x) = Z1 + e3 + Z2 + Z3 + (e2 + e3)
+//
+//
+// exp(yLog(x)) = exp(Z1 + Z2 + Z3) exp(e1 + e2 + e3)
+//
+//
+// exp(Z3) is another series.
+// exp(e1 + e2 + e3) is approximated as f3 = 1 + (e1 + e2 + e3)
+//
+// Z1 (128/log2) = number of log2/128 in Z1 is N1
+// Z2 (128/log2) = number of log2/128 in Z2 is N2
+//
+// s1 = Z1 - N1 log2/128
+// s2 = Z2 - N2 log2/128
+//
+// s = s1 + s2
+// N = N1 + N2
+//
+// exp(Z1 + Z2) = exp(Z)
+// exp(Z) = exp(s) exp(N log2/128)
+//
+// exp(r) = exp(Z - N log2/128)
+//
+// r = s + d = (Z - N (log2/128)_hi) -N (log2/128)_lo
+// = Z - N (log2/128)
+//
+// Z = s+d +N (log2/128)
+//
+// exp(Z) = exp(s) (1+d) exp(N log2/128)
+//
+// N = M 128 + n
+//
+// N log2/128 = M log2 + n log2/128
+//
+// n is 8 binary digits = n_7n_6...n_1
+//
+// n log2/128 = n_7n_6n_5 16 log2/128 + n_4n_3n_2n_1 log2/128
+// n log2/128 = n_7n_6n_5 log2/8 + n_4n_3n_2n_1 log2/128
+// n log2/128 = I2 log2/8 + I1 log2/128
+//
+// N log2/128 = M log2 + I2 log2/8 + I1 log2/128
+//
+// exp(Z) = exp(s) (1+d) exp(log(2^M) + log(2^I2/8) + log(2^I1/128))
+// exp(Z) = exp(s) (1+d1) (1+d2)(2^M) 2^I2/8 2^I1/128
+// exp(Z) = exp(s) f1 f2 (2^M) 2^I2/8 2^I1/128
+//
+// I1, I2 are table indices. Use a series for exp(s).
+// Then get exp(Z)
+//
+// exp(yLog(x)) = exp(Z1 + Z2 + Z3) exp(e1 + e2 + e3)
+// exp(yLog(x)) = exp(Z) exp(Z3) f3
+// exp(yLog(x)) = exp(Z)f3 exp(Z3)
+// exp(yLog(x)) = A exp(Z3)
+//
+// We actually calculate exp(Z3) -1.
+// Then,
+// exp(yLog(x)) = A + A( exp(Z3) -1)
+//
+
+// Table Generation
+//==============================================================
+
+// The log values
+// ==============
+// The operation (K*log2_hi) must be exact. K is the true exponent of x.
+// If we allow gradual underflow (denormals), K can be represented in 12 bits
+// (as a two's complement number). We assume 13 bits as an engineering precaution.
+//
+// +------------+----------------+-+
+// | 13 bits | 50 bits | |
+// +------------+----------------+-+
+// 0 1 66
+// 2 34
+//
+// So we want the lsb(log2_hi) to be 2^-50
+// We get log2 as a quad-extended (15-bit exponent, 128-bit significand)
+//
+// 0 fffe b17217f7d1cf79ab c9e3b39803f2f6af (4...)
+//
+// Consider numbering the bits left to right, starting at 0 thru 127.
+// Bit 0 is the 2^-1 bit; bit 49 is the 2^-50 bit.
+//
+// ...79ab
+// 0111 1001 1010 1011
+// 44
+// 89
+//
+// So if we shift off the rightmost 14 bits, then (shift back only
+// the top half) we get
+//
+// 0 fffe b17217f7d1cf4000 e6af278ece600fcb dabc000000000000
+//
+// Put the right 64-bit signficand in an FR register, convert to double;
+// it is exact. Put the next 128 bits into a quad register and round to double.
+// The true exponent of the low part is -51.
+//
+// hi is 0 fffe b17217f7d1cf4000
+// lo is 0 ffcc e6af278ece601000
+//
+// Convert to double memory format and get
+//
+// hi is 0x3fe62e42fefa39e8
+// lo is 0x3cccd5e4f1d9cc02
+//
+// log2_hi + log2_lo is an accurate value for log2.
+//
+//
+// The T and t values
+// ==================
+// A similar method is used to generate the T and t values.
+//
+// K * log2_hi + T must be exact.
+//
+// Smallest T,t
+// ----------
+// The smallest T,t is
+// T t
+// data8 0x3f60040155d58800, 0x3c93bce0ce3ddd81 log(1/frcpa(1+0/256))= +1.95503e-003
+//
+// The exponent is 0x3f6 (biased) or -9 (true).
+// For the smallest T value, what we want is to clip the significand such that
+// when it is shifted right by 9, its lsb is in the bit for 2^-51. The 9 is the specific
+// for the first entry. In general, it is 0xffff - (biased 15-bit exponent).
+
+// Independently, what we have calculated is the table value as a quad precision number.
+// Table entry 1 is
+// 0 fff6 80200aaeac44ef38 338f77605fdf8000
+//
+// We store this quad precision number in a data structure that is
+// sign: 1
+// exponent: 15
+// signficand_hi: 64 (includes explicit bit)
+// signficand_lo: 49
+// Because the explicit bit is included, the significand is 113 bits.
+//
+// Consider significand_hi for table entry 1.
+//
+//
+// +-+--- ... -------+--------------------+
+// | |
+// +-+--- ... -------+--------------------+
+// 0 1 4444444455555555556666
+// 2345678901234567890123
+//
+// Labeled as above, bit 0 is 2^0, bit 1 is 2^-1, etc.
+// Bit 42 is 2^-42. If we shift to the right by 9, the bit in
+// bit 42 goes in 51.
+//
+// So what we want to do is shift bits 43 thru 63 into significand_lo.
+// This is shifting bit 42 into bit 63, taking care to retain the shifted-off bits.
+// Then shifting (just with signficaand_hi) back into bit 42.
+//
+// The shift_value is 63-42 = 21. In general, this is
+// 63 - (51 -(0xffff - 0xfff6))
+// For this example, it is
+// 63 - (51 - 9) = 63 - 42 = 21
+//
+// This means we are shifting 21 bits into significand_lo. We must maintain more
+// that a 128-bit signficand not to lose bits. So before the shift we put the 128-bit
+// significand into a 256-bit signficand and then shift.
+// The 256-bit significand has four parts: hh, hl, lh, and ll.
+//
+// Start off with
+// hh hl lh ll
+// <64> <49><15_0> <64_0> <64_0>
+//
+// After shift by 21 (then return for significand_hi),
+// <43><21_0> <21><43> <6><58_0> <64_0>
+//
+// Take the hh part and convert to a double. There is no rounding here.
+// The conversion is exact. The true exponent of the high part is the same as the
+// true exponent of the input quad.
+//
+// We have some 64 plus significand bits for the low part. In this example, we have
+// 70 bits. We want to round this to a double. Put them in a quad and then do a quad fnorm.
+// For this example the true exponent of the low part is
+// true_exponent_of_high - 43 = true_exponent_of_high - (64-21)
+// In general, this is
+// true_exponent_of_high - (64 - shift_value)
+//
+//
+// Largest T,t
+// ----------
+// The largest T,t is
+// data8 0x3fe62643fecf9742, 0x3c9e3147684bd37d log(1/frcpa(1+255/256))= +6.92171e-001
+//
+// Table entry 256 is
+// 0 fffe b1321ff67cba178c 51da12f4df5a0000
+//
+// The shift value is
+// 63 - (51 -(0xffff - 0xfffe)) = 13
+//
+// The true exponent of the low part is
+// true_exponent_of_high - (64 - shift_value)
+// -1 - (64-13) = -52
+// Biased as a double, this is 0x3cb
+//
+//
+//
+// So then lsb(T) must be >= 2^-51
+// msb(Klog2_hi) <= 2^12
+//
+// +--------+---------+
+// | 51 bits | <== largest T
+// +--------+---------+
+// | 9 bits | 42 bits | <== smallest T
+// +------------+----------------+-+
+// | 13 bits | 50 bits | |
+// +------------+----------------+-+
+
+
+
+// Special Cases
+//==============================================================
+
+// double float
+// overflow error 24 30
+
+// underflow error 25 31
+
+// X zero Y zero
+// +0 +0 +1 error 26 32
+// -0 +0 +1 error 26 32
+// +0 -0 +1 error 26 32
+// -0 -0 +1 error 26 32
+
+// X zero Y negative
+// +0 -odd integer +inf error 27 33 divide-by-zero
+// -0 -odd integer -inf error 27 33 divide-by-zero
+// +0 !-odd integer +inf error 27 33 divide-by-zero
+// -0 !-odd integer +inf error 27 33 divide-by-zero
+// +0 -inf +inf error 27 33 divide-by-zero
+// -0 -inf +inf error 27 33 divide-by-zero
+
+// X zero Y positve
+// +0 +odd integer +0
+// -0 +odd integer -0
+// +0 !+odd integer +0
+// -0 !+odd integer +0
+// +0 +inf +0
+// -0 +inf +0
+// +0 Y NaN quiet Y invalid if Y SNaN
+// -0 Y NaN quiet Y invalid if Y SNaN
+
+// X one
+// -1 Y inf +1
+// -1 Y NaN quiet Y invalid if Y SNaN
+// +1 Y NaN +1 invalid if Y SNaN
+// +1 Y any else +1
+
+// X - Y not integer QNAN error 28 34 invalid
+
+// X NaN Y 0 +1 error 29 35
+// X NaN Y NaN quiet X invalid if X or Y SNaN
+// X NaN Y any else quiet X invalid if X SNaN
+// X !+1 Y NaN quiet Y invalid if Y SNaN
+
+
+// X +inf Y >0 +inf
+// X -inf Y >0, !odd integer +inf
+// X -inf Y >0, odd integer -inf
+
+// X +inf Y <0 +0
+// X -inf Y <0, !odd integer +0
+// X -inf Y <0, odd integer -0
+
+// X +inf Y =0 +1
+// X -inf Y =0 +1
+
+// |X|<1 Y +inf +0
+// |X|<1 Y -inf +inf
+// |X|>1 Y +inf +inf
+// |X|>1 Y -inf +0
+
+// X any Y =0 +1
+
+#include "libm_support.h"
+
+// Assembly macros
+//==============================================================
+
+// integer registers used
+
+pow_AD_Tt = r33
+pow_GR_FFF7 = r34
+pow_GR_exp_Y = r34 // duplicate
+pow_GR_17ones = r35
+
+pow_AD_P = r36
+pow_AD_Q = r37
+pow_AD_tbl1 = r38
+pow_AD_tbl2 = r39
+pow_GR_exp_X = r40
+pow_GR_true_exp_X = r40 // duplicate
+
+pow_GR_offset = r41
+pow_GR_exp_Xm1 = r42
+pow_GR_sig_X = r43
+pow_GR_signexp_X = r44
+
+pow_GR_signexp_Xm1 = r46
+pow_GR_int_W1 = r47
+pow_GR_int_W2 = r48
+pow_GR_int_N = r49
+pow_GR_index1 = r50
+
+pow_GR_index2 = r51
+pow_AD_T1 = r52
+pow_AD_T2 = r53
+pow_GR_gt_ln = r53 // duplicate
+pow_int_GR_M = r54
+pow_GR_10033 = r55
+
+pow_GR_16ones = r56
+pow_GR_sig_int_Y = r57
+pow_GR_sign_Y_Gpr = r58
+pow_GR_17ones_m1 = r59
+pow_GR_one = r60
+pow_GR_sign_Y = r60
+
+pow_GR_signexp_Y_Gpr = r61
+pow_GR_exp_Y_Gpr = r62
+pow_GR_true_exp_Y_Gpr = r63
+pow_GR_signexp_Y = r64
+
+GR_SAVE_B0 = r65
+GR_SAVE_GP = r66
+GR_SAVE_PFS = r67
+
+GR_Parameter_X = r68
+GR_Parameter_Y = r69
+GR_Parameter_RESULT = r70
+pow_GR_tag = r71
+
+
+// floating point registers used
+
+POW_B = f32
+POW_NORM_X = f33
+POW_Xm1 = f34
+POW_r1 = f34
+POW_P4 = f35
+
+POW_P5 = f36
+POW_NORM_Y = f37
+POW_Q2 = f38
+POW_Q3 = f39
+POW_P2 = f40
+
+POW_P3 = f41
+POW_P0 = f42
+POW_log2_lo = f43
+POW_r = f44
+POW_Q0_half = f45
+
+POW_Q1 = f46
+POW_log2_hi = f48
+POW_Q4 = f49
+POW_P1 = f50
+
+POW_log2_by_128_hi = f51
+POW_inv_log2_by_128 = f52
+POW_rsq = f53
+POW_Yrcub = f54
+POW_log2_by_128_lo = f55
+
+POW_v6 = f56
+POW_v4 = f58
+POW_v2 = f59
+POW_T = f60
+
+POW_Tt = f61
+POW_RSHF = f62
+POW_v21ps = f63
+POW_s4 = f64
+
+POW_U = f66
+POW_G = f67
+POW_delta = f68
+POW_v3 = f69
+POW_V = f70
+
+POW_p = f71
+POW_Z1 = f72
+POW_e3 = f73
+POW_e2 = f74
+POW_Z2 = f75
+
+POW_e1 = f76
+POW_W1 = f77
+POW_UmZ2 = f78
+POW_W2 = f79
+POW_Z3 = f80
+
+POW_int_W1 = f81
+POW_e12 = f82
+POW_int_W2 = f83
+POW_UmZ2pV = f84
+POW_Z3sq = f85
+
+POW_e123 = f86
+POW_N1float = f87
+POW_N2float = f88
+POW_f3 = f89
+POW_q = f90
+
+POW_s1 = f91
+POW_Nfloat = f92
+POW_s2 = f93
+POW_f2 = f94
+POW_f1 = f95
+
+POW_T1 = f96
+POW_T2 = f97
+POW_2M = f98
+POW_s = f99
+POW_f12 = f100
+
+POW_ssq = f101
+POW_T1T2 = f102
+POW_1ps = f103
+POW_A = f104
+POW_es = f105
+
+POW_int_K = f107
+POW_K = f108
+POW_f123 = f109
+POW_Gpr = f110
+
+POW_Y_Gpr = f111
+POW_int_Y = f112
+
+POW_float_int_Y = f116
+POW_ftz_urm_f8 = f117
+POW_wre_urm_f8 = f118
+POW_abs_A = f119
+POW_gt_pln = f120
+
+POW_xsq = f121
+
+POW_twoV = f122
+POW_Xp1 = f123
+
+// Data tables
+//==============================================================
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
+.align 16
+
+pow_table_P:
+ASM_TYPE_DIRECTIVE(pow_table_P,@object)
+data8 0x8000F7B249FF332D, 0x0000BFFC // P_5
+data8 0xAAAAAAA9E7902C7F, 0x0000BFFC // P_3
+data8 0x80000000000018E5, 0x0000BFFD // P_1
+data8 0xb8aa3b295c17f0bc, 0x00004006 // inv_ln2_by_128
+
+
+data8 0x3FA5555555554A9E // Q_2
+data8 0x3F8111124F4DD9F9 // Q_3
+data8 0x3FE0000000000000 // Q_0
+data8 0x3FC5555555554733 // Q_1
+data8 0x3F56C16D9360FFA0 // Q_4
+data8 0x43e8000000000000 // Right shift constant for exp
+data8 0xc9e3b39803f2f6af, 0x00003fb7 // ln2_by_128_lo
+data8 0x0000000000000000 // pad to eliminate bank conflicts with pow_table_Q
+data8 0x0000000000000000 // pad to eliminate bank conflicts with pow_table_Q
+ASM_SIZE_DIRECTIVE(pow_table_P)
+
+pow_table_Q:
+ASM_TYPE_DIRECTIVE(pow_table_Q,@object)
+data8 0x9249FE7F0DC423CF, 0x00003FFC // P_4
+data8 0xCCCCCCCC4ED2BA7F, 0x00003FFC // P_2
+data8 0xAAAAAAAAAAAAB505, 0x00003FFD // P_0
+data8 0x3fe62e42fefa39e8, 0x3cccd5e4f1d9cc02 // log2 hi lo = +6.93147e-001
+data8 0xb17217f7d1cf79ab, 0x00003ff7 // ln2_by_128_hi
+ASM_SIZE_DIRECTIVE(pow_table_Q)
+
+
+pow_Tt:
+ASM_TYPE_DIRECTIVE(pow_Tt,@object)
+data8 0x3f60040155d58800, 0x3c93bce0ce3ddd81 // log(1/frcpa(1+0/256))= +1.95503e-003
+data8 0x3f78121214586a00, 0x3cb540e0a5cfc9bc // log(1/frcpa(1+1/256))= +5.87661e-003
+data8 0x3f841929f9683200, 0x3cbdf1d57404da1f // log(1/frcpa(1+2/256))= +9.81362e-003
+data8 0x3f8c317384c75f00, 0x3c69806208c04c22 // log(1/frcpa(1+3/256))= +1.37662e-002
+data8 0x3f91a6b91ac73380, 0x3c7874daa716eb32 // log(1/frcpa(1+4/256))= +1.72376e-002
+data8 0x3f95ba9a5d9ac000, 0x3cacbb84e08d78ac // log(1/frcpa(1+5/256))= +2.12196e-002
+data8 0x3f99d2a807432580, 0x3cbcf80538b441e1 // log(1/frcpa(1+6/256))= +2.52177e-002
+data8 0x3f9d6b2725979800, 0x3c6095e5c8f8f359 // log(1/frcpa(1+7/256))= +2.87291e-002
+data8 0x3fa0c58fa19dfa80, 0x3cb4c5d4e9d0dda2 // log(1/frcpa(1+8/256))= +3.27573e-002
+data8 0x3fa2954c78cbce00, 0x3caa932b860ab8d6 // log(1/frcpa(1+9/256))= +3.62953e-002
+data8 0x3fa4a94d2da96c40, 0x3ca670452b76bbd5 // log(1/frcpa(1+10/256))= +4.03542e-002
+data8 0x3fa67c94f2d4bb40, 0x3ca84104f9941798 // log(1/frcpa(1+11/256))= +4.39192e-002
+data8 0x3fa85188b630f040, 0x3cb40a882cbf0153 // log(1/frcpa(1+12/256))= +4.74971e-002
+data8 0x3faa6b8abe73af40, 0x3c988d46e25c9059 // log(1/frcpa(1+13/256))= +5.16017e-002
+data8 0x3fac441e06f72a80, 0x3cae3e930a1a2a96 // log(1/frcpa(1+14/256))= +5.52072e-002
+data8 0x3fae1e6713606d00, 0x3c8a796f6283b580 // log(1/frcpa(1+15/256))= +5.88257e-002
+data8 0x3faffa6911ab9300, 0x3c5193070351e88a // log(1/frcpa(1+16/256))= +6.24574e-002
+data8 0x3fb0ec139c5da600, 0x3c623f2a75eb992d // log(1/frcpa(1+17/256))= +6.61022e-002
+data8 0x3fb1dbd2643d1900, 0x3ca649b2ef8927f0 // log(1/frcpa(1+18/256))= +6.97605e-002
+data8 0x3fb2cc7284fe5f00, 0x3cbc5e86599513e2 // log(1/frcpa(1+19/256))= +7.34321e-002
+data8 0x3fb3bdf5a7d1ee60, 0x3c90bd4bb69dada3 // log(1/frcpa(1+20/256))= +7.71173e-002
+data8 0x3fb4b05d7aa012e0, 0x3c54e377c9b8a54f // log(1/frcpa(1+21/256))= +8.08161e-002
+data8 0x3fb580db7ceb5700, 0x3c7fdb2f98354cde // log(1/frcpa(1+22/256))= +8.39975e-002
+data8 0x3fb674f089365a60, 0x3cb9994c9d3301c1 // log(1/frcpa(1+23/256))= +8.77219e-002
+data8 0x3fb769ef2c6b5680, 0x3caaec639db52a79 // log(1/frcpa(1+24/256))= +9.14602e-002
+data8 0x3fb85fd927506a40, 0x3c9f9f99a3cf8e25 // log(1/frcpa(1+25/256))= +9.52125e-002
+data8 0x3fb9335e5d594980, 0x3ca15c3abd47d99a // log(1/frcpa(1+26/256))= +9.84401e-002
+data8 0x3fba2b0220c8e5e0, 0x3cb4ca639adf6fc3 // log(1/frcpa(1+27/256))= +1.02219e-001
+data8 0x3fbb0004ac1a86a0, 0x3ca7cb81bf959a59 // log(1/frcpa(1+28/256))= +1.05469e-001
+data8 0x3fbbf968769fca00, 0x3cb0c646c121418e // log(1/frcpa(1+29/256))= +1.09274e-001
+data8 0x3fbccfedbfee13a0, 0x3ca0465fce24ab4b // log(1/frcpa(1+30/256))= +1.12548e-001
+data8 0x3fbda727638446a0, 0x3c82803f4e2e6603 // log(1/frcpa(1+31/256))= +1.15832e-001
+data8 0x3fbea3257fe10f60, 0x3cb986a3f2313d1a // log(1/frcpa(1+32/256))= +1.19677e-001
+data8 0x3fbf7be9fedbfde0, 0x3c97d16a6a621cf4 // log(1/frcpa(1+33/256))= +1.22985e-001
+data8 0x3fc02ab352ff25f0, 0x3c9cc6baad365600 // log(1/frcpa(1+34/256))= +1.26303e-001
+data8 0x3fc097ce579d2040, 0x3cb9ba16d329440b // log(1/frcpa(1+35/256))= +1.29633e-001
+data8 0x3fc1178e8227e470, 0x3cb7bc671683f8e6 // log(1/frcpa(1+36/256))= +1.33531e-001
+data8 0x3fc185747dbecf30, 0x3c9d1116f66d2345 // log(1/frcpa(1+37/256))= +1.36885e-001
+data8 0x3fc1f3b925f25d40, 0x3c8162c9ef939ac6 // log(1/frcpa(1+38/256))= +1.40250e-001
+data8 0x3fc2625d1e6ddf50, 0x3caad3a1ec384fc3 // log(1/frcpa(1+39/256))= +1.43627e-001
+data8 0x3fc2d1610c868130, 0x3cb3ad997036941b // log(1/frcpa(1+40/256))= +1.47015e-001
+data8 0x3fc340c597411420, 0x3cbc2308262c7998 // log(1/frcpa(1+41/256))= +1.50414e-001
+data8 0x3fc3b08b6757f2a0, 0x3cb2170d6cdf0526 // log(1/frcpa(1+42/256))= +1.53825e-001
+data8 0x3fc40dfb08378000, 0x3c9bb453c4f7b685 // log(1/frcpa(1+43/256))= +1.56677e-001
+data8 0x3fc47e74e8ca5f70, 0x3cb836a48fdfce9d // log(1/frcpa(1+44/256))= +1.60109e-001
+data8 0x3fc4ef51f6466de0, 0x3ca07a43919aa64b // log(1/frcpa(1+45/256))= +1.63553e-001
+data8 0x3fc56092e02ba510, 0x3ca85006899d97b0 // log(1/frcpa(1+46/256))= +1.67010e-001
+data8 0x3fc5d23857cd74d0, 0x3ca30a5ba6e7abbe // log(1/frcpa(1+47/256))= +1.70478e-001
+data8 0x3fc6313a37335d70, 0x3ca905586f0ac97e // log(1/frcpa(1+48/256))= +1.73377e-001
+data8 0x3fc6a399dabbd380, 0x3c9b2c6657a96684 // log(1/frcpa(1+49/256))= +1.76868e-001
+data8 0x3fc70337dd3ce410, 0x3cb50bc52f55cdd8 // log(1/frcpa(1+50/256))= +1.79786e-001
+data8 0x3fc77654128f6120, 0x3cad2eb7c9a39efe // log(1/frcpa(1+51/256))= +1.83299e-001
+data8 0x3fc7e9d82a0b0220, 0x3cba127e90393c01 // log(1/frcpa(1+52/256))= +1.86824e-001
+data8 0x3fc84a6b759f5120, 0x3cbd7fd52079f706 // log(1/frcpa(1+53/256))= +1.89771e-001
+data8 0x3fc8ab47d5f5a300, 0x3cbfae141751a3de // log(1/frcpa(1+54/256))= +1.92727e-001
+data8 0x3fc91fe490965810, 0x3cb69cf30a1c319e // log(1/frcpa(1+55/256))= +1.96286e-001
+data8 0x3fc981634011aa70, 0x3ca5bb3d208bc42a // log(1/frcpa(1+56/256))= +1.99261e-001
+data8 0x3fc9f6c407089660, 0x3ca04d68658179a0 // log(1/frcpa(1+57/256))= +2.02843e-001
+data8 0x3fca58e729348f40, 0x3c99f5411546c286 // log(1/frcpa(1+58/256))= +2.05838e-001
+data8 0x3fcabb55c31693a0, 0x3cb9a5350eb327d5 // log(1/frcpa(1+59/256))= +2.08842e-001
+data8 0x3fcb1e104919efd0, 0x3c18965fcce7c406 // log(1/frcpa(1+60/256))= +2.11855e-001
+data8 0x3fcb94ee93e367c0, 0x3cb503716da45184 // log(1/frcpa(1+61/256))= +2.15483e-001
+data8 0x3fcbf851c0675550, 0x3cbdf1b3f7ab5378 // log(1/frcpa(1+62/256))= +2.18516e-001
+data8 0x3fcc5c0254bf23a0, 0x3ca7aab9ed0b1d7b // log(1/frcpa(1+63/256))= +2.21558e-001
+data8 0x3fccc000c9db3c50, 0x3c92a7a2a850072a // log(1/frcpa(1+64/256))= +2.24609e-001
+data8 0x3fcd244d99c85670, 0x3c9f6019120edf4c // log(1/frcpa(1+65/256))= +2.27670e-001
+data8 0x3fcd88e93fb2f450, 0x3c6affb96815e081 // log(1/frcpa(1+66/256))= +2.30741e-001
+data8 0x3fcdedd437eaef00, 0x3c72553595897976 // log(1/frcpa(1+67/256))= +2.33820e-001
+data8 0x3fce530effe71010, 0x3c90913b020fa182 // log(1/frcpa(1+68/256))= +2.36910e-001
+data8 0x3fceb89a1648b970, 0x3c837ba4045bfd25 // log(1/frcpa(1+69/256))= +2.40009e-001
+data8 0x3fcf1e75fadf9bd0, 0x3cbcea6d13e0498d // log(1/frcpa(1+70/256))= +2.43117e-001
+data8 0x3fcf84a32ead7c30, 0x3ca5e3a67b3c6d77 // log(1/frcpa(1+71/256))= +2.46235e-001
+data8 0x3fcfeb2233ea07c0, 0x3cba0c6f0049c5a6 // log(1/frcpa(1+72/256))= +2.49363e-001
+data8 0x3fd028f9c7035c18, 0x3cb0a30b06677ff6 // log(1/frcpa(1+73/256))= +2.52501e-001
+data8 0x3fd05c8be0d96358, 0x3ca0f1c77ccb5865 // log(1/frcpa(1+74/256))= +2.55649e-001
+data8 0x3fd085eb8f8ae790, 0x3cbd513f45fe7a97 // log(1/frcpa(1+75/256))= +2.58174e-001
+data8 0x3fd0b9c8e32d1910, 0x3c927449047ca006 // log(1/frcpa(1+76/256))= +2.61339e-001
+data8 0x3fd0edd060b78080, 0x3c89b52d8435f53e // log(1/frcpa(1+77/256))= +2.64515e-001
+data8 0x3fd122024cf00638, 0x3cbdd976fabda4bd // log(1/frcpa(1+78/256))= +2.67701e-001
+data8 0x3fd14be2927aecd0, 0x3cb02f90ad0bc471 // log(1/frcpa(1+79/256))= +2.70257e-001
+data8 0x3fd180618ef18ad8, 0x3cbd003792c71a98 // log(1/frcpa(1+80/256))= +2.73461e-001
+data8 0x3fd1b50bbe2fc638, 0x3ca9ae64c6403ead // log(1/frcpa(1+81/256))= +2.76675e-001
+data8 0x3fd1df4cc7cf2428, 0x3cb43f0455f7e395 // log(1/frcpa(1+82/256))= +2.79254e-001
+data8 0x3fd214456d0eb8d0, 0x3cb0fbd748d75d30 // log(1/frcpa(1+83/256))= +2.82487e-001
+data8 0x3fd23ec5991eba48, 0x3c906edd746b77e2 // log(1/frcpa(1+84/256))= +2.85081e-001
+data8 0x3fd2740d9f870af8, 0x3ca9802e6a00a670 // log(1/frcpa(1+85/256))= +2.88333e-001
+data8 0x3fd29ecdabcdfa00, 0x3cacecef70890cfa // log(1/frcpa(1+86/256))= +2.90943e-001
+data8 0x3fd2d46602adcce8, 0x3cb97911955f3521 // log(1/frcpa(1+87/256))= +2.94214e-001
+data8 0x3fd2ff66b04ea9d0, 0x3cb12dabe191d1c9 // log(1/frcpa(1+88/256))= +2.96838e-001
+data8 0x3fd335504b355a30, 0x3cbdf9139df924ec // log(1/frcpa(1+89/256))= +3.00129e-001
+data8 0x3fd360925ec44f58, 0x3cb253e68977a1e3 // log(1/frcpa(1+90/256))= +3.02769e-001
+data8 0x3fd38bf1c3337e70, 0x3cb3d283d2a2da21 // log(1/frcpa(1+91/256))= +3.05417e-001
+data8 0x3fd3c25277333180, 0x3cadaa5b035eae27 // log(1/frcpa(1+92/256))= +3.08735e-001
+data8 0x3fd3edf463c16838, 0x3cb983d680d3c108 // log(1/frcpa(1+93/256))= +3.11399e-001
+data8 0x3fd419b423d5e8c0, 0x3cbc86dd921c139d // log(1/frcpa(1+94/256))= +3.14069e-001
+data8 0x3fd44591e0539f48, 0x3c86a76d6dc2782e // log(1/frcpa(1+95/256))= +3.16746e-001
+data8 0x3fd47c9175b6f0a8, 0x3cb59a2e013c6b5f // log(1/frcpa(1+96/256))= +3.20103e-001
+data8 0x3fd4a8b341552b08, 0x3c93f1e86e468694 // log(1/frcpa(1+97/256))= +3.22797e-001
+data8 0x3fd4d4f390890198, 0x3cbf5e4ea7c5105a // log(1/frcpa(1+98/256))= +3.25498e-001
+data8 0x3fd501528da1f960, 0x3cbf58da53e9ad10 // log(1/frcpa(1+99/256))= +3.28206e-001
+data8 0x3fd52dd06347d4f0, 0x3cb98a28cebf6eef // log(1/frcpa(1+100/256))= +3.30921e-001
+data8 0x3fd55a6d3c7b8a88, 0x3c9c76b67c2d1fd4 // log(1/frcpa(1+101/256))= +3.33644e-001
+data8 0x3fd5925d2b112a58, 0x3c9029616a4331b8 // log(1/frcpa(1+102/256))= +3.37058e-001
+data8 0x3fd5bf406b543db0, 0x3c9fb8292ecfc820 // log(1/frcpa(1+103/256))= +3.39798e-001
+data8 0x3fd5ec433d5c35a8, 0x3cb71a1229d17eec // log(1/frcpa(1+104/256))= +3.42545e-001
+data8 0x3fd61965cdb02c18, 0x3cbba94fe1dbb8d2 // log(1/frcpa(1+105/256))= +3.45300e-001
+data8 0x3fd646a84935b2a0, 0x3c9ee496d2c9ae57 // log(1/frcpa(1+106/256))= +3.48063e-001
+data8 0x3fd6740add31de90, 0x3cb1da3a6c7a9dfd // log(1/frcpa(1+107/256))= +3.50833e-001
+data8 0x3fd6a18db74a58c0, 0x3cb494c257add8dc // log(1/frcpa(1+108/256))= +3.53610e-001
+data8 0x3fd6cf31058670e8, 0x3cb0b244a70a8da9 // log(1/frcpa(1+109/256))= +3.56396e-001
+data8 0x3fd6f180e852f0b8, 0x3c9db7aefa866720 // log(1/frcpa(1+110/256))= +3.58490e-001
+data8 0x3fd71f5d71b894e8, 0x3cbe91c4bf324957 // log(1/frcpa(1+111/256))= +3.61289e-001
+data8 0x3fd74d5aefd66d58, 0x3cb06b3d9bfac023 // log(1/frcpa(1+112/256))= +3.64096e-001
+data8 0x3fd77b79922bd378, 0x3cb727d8804491f4 // log(1/frcpa(1+113/256))= +3.66911e-001
+data8 0x3fd7a9b9889f19e0, 0x3ca2ef22df5bc543 // log(1/frcpa(1+114/256))= +3.69734e-001
+data8 0x3fd7d81b037eb6a0, 0x3cb8fd3ba07a7ece // log(1/frcpa(1+115/256))= +3.72565e-001
+data8 0x3fd8069e33827230, 0x3c8bd1e25866e61a // log(1/frcpa(1+116/256))= +3.75404e-001
+data8 0x3fd82996d3ef8bc8, 0x3ca5aab9f5928928 // log(1/frcpa(1+117/256))= +3.77538e-001
+data8 0x3fd85855776dcbf8, 0x3ca56f33337789d6 // log(1/frcpa(1+118/256))= +3.80391e-001
+data8 0x3fd8873658327cc8, 0x3cbb8ef0401db49d // log(1/frcpa(1+119/256))= +3.83253e-001
+data8 0x3fd8aa75973ab8c8, 0x3cbb9961f509a680 // log(1/frcpa(1+120/256))= +3.85404e-001
+data8 0x3fd8d992dc8824e0, 0x3cb220512a53732d // log(1/frcpa(1+121/256))= +3.88280e-001
+data8 0x3fd908d2ea7d9510, 0x3c985f0e513bfb5c // log(1/frcpa(1+122/256))= +3.91164e-001
+data8 0x3fd92c59e79c0e50, 0x3cb82e073fd30d63 // log(1/frcpa(1+123/256))= +3.93332e-001
+data8 0x3fd95bd750ee3ed0, 0x3ca4aa7cdb6dd8a8 // log(1/frcpa(1+124/256))= +3.96231e-001
+data8 0x3fd98b7811a3ee58, 0x3caa93a5b660893e // log(1/frcpa(1+125/256))= +3.99138e-001
+data8 0x3fd9af47f33d4068, 0x3cac294b3b3190ba // log(1/frcpa(1+126/256))= +4.01323e-001
+data8 0x3fd9df270c1914a0, 0x3cbe1a58fd0cd67e // log(1/frcpa(1+127/256))= +4.04245e-001
+data8 0x3fda0325ed14fda0, 0x3cb1efa7950fb57e // log(1/frcpa(1+128/256))= +4.06442e-001
+data8 0x3fda33440224fa78, 0x3c8915fe75e7d477 // log(1/frcpa(1+129/256))= +4.09379e-001
+data8 0x3fda57725e80c380, 0x3ca72bd1062b1b7f // log(1/frcpa(1+130/256))= +4.11587e-001
+data8 0x3fda87d0165dd198, 0x3c91f7845f58dbad // log(1/frcpa(1+131/256))= +4.14539e-001
+data8 0x3fdaac2e6c03f890, 0x3cb6f237a911c509 // log(1/frcpa(1+132/256))= +4.16759e-001
+data8 0x3fdadccc6fdf6a80, 0x3c90ddc4b7687169 // log(1/frcpa(1+133/256))= +4.19726e-001
+data8 0x3fdb015b3eb1e790, 0x3c692dd7d90e1e8e // log(1/frcpa(1+134/256))= +4.21958e-001
+data8 0x3fdb323a3a635948, 0x3c6f85655cbe14de // log(1/frcpa(1+135/256))= +4.24941e-001
+data8 0x3fdb56fa04462908, 0x3c95252d841994de // log(1/frcpa(1+136/256))= +4.27184e-001
+data8 0x3fdb881aa659bc90, 0x3caa53a745a3642f // log(1/frcpa(1+137/256))= +4.30182e-001
+data8 0x3fdbad0bef3db160, 0x3cb32f2540dcc16a // log(1/frcpa(1+138/256))= +4.32437e-001
+data8 0x3fdbd21297781c28, 0x3cbd8e891e106f1d // log(1/frcpa(1+139/256))= +4.34697e-001
+data8 0x3fdc039236f08818, 0x3c809435af522ba7 // log(1/frcpa(1+140/256))= +4.37718e-001
+data8 0x3fdc28cb1e4d32f8, 0x3cb3944752fbd81e // log(1/frcpa(1+141/256))= +4.39990e-001
+data8 0x3fdc4e19b84723c0, 0x3c9a465260cd3fe5 // log(1/frcpa(1+142/256))= +4.42267e-001
+data8 0x3fdc7ff9c74554c8, 0x3c92447d5b6ca369 // log(1/frcpa(1+143/256))= +4.45311e-001
+data8 0x3fdca57b64e9db00, 0x3cb44344a8a00c82 // log(1/frcpa(1+144/256))= +4.47600e-001
+data8 0x3fdccb130a5ceba8, 0x3cbefaddfb97b73f // log(1/frcpa(1+145/256))= +4.49895e-001
+data8 0x3fdcf0c0d18f3268, 0x3cbd3e7bfee57898 // log(1/frcpa(1+146/256))= +4.52194e-001
+data8 0x3fdd232075b5a200, 0x3c9222599987447c // log(1/frcpa(1+147/256))= +4.55269e-001
+data8 0x3fdd490246defa68, 0x3cabafe9a767a80d // log(1/frcpa(1+148/256))= +4.57581e-001
+data8 0x3fdd6efa918d25c8, 0x3cb58a2624e1c6fd // log(1/frcpa(1+149/256))= +4.59899e-001
+data8 0x3fdd9509707ae528, 0x3cbdc3babce578e7 // log(1/frcpa(1+150/256))= +4.62221e-001
+data8 0x3fddbb2efe92c550, 0x3cb0ac0943c434a4 // log(1/frcpa(1+151/256))= +4.64550e-001
+data8 0x3fddee2f3445e4a8, 0x3cbba9d07ce820e8 // log(1/frcpa(1+152/256))= +4.67663e-001
+data8 0x3fde148a1a2726c8, 0x3cb6537e3375b205 // log(1/frcpa(1+153/256))= +4.70004e-001
+data8 0x3fde3afc0a49ff38, 0x3cbfed5518dbc20e // log(1/frcpa(1+154/256))= +4.72350e-001
+data8 0x3fde6185206d5168, 0x3cb6572601f73d5c // log(1/frcpa(1+155/256))= +4.74702e-001
+data8 0x3fde882578823d50, 0x3c9b24abd4584d1a // log(1/frcpa(1+156/256))= +4.77060e-001
+data8 0x3fdeaedd2eac9908, 0x3cb0ceb5e4d2c8f7 // log(1/frcpa(1+157/256))= +4.79423e-001
+data8 0x3fded5ac5f436be0, 0x3ca72f21f1f5238e // log(1/frcpa(1+158/256))= +4.81792e-001
+data8 0x3fdefc9326d16ab8, 0x3c85081a1639a45c // log(1/frcpa(1+159/256))= +4.84166e-001
+data8 0x3fdf2391a21575f8, 0x3cbf11015bdd297a // log(1/frcpa(1+160/256))= +4.86546e-001
+data8 0x3fdf4aa7ee031928, 0x3cb3795bc052a2d1 // log(1/frcpa(1+161/256))= +4.88932e-001
+data8 0x3fdf71d627c30bb0, 0x3c35c61f0f5a88f3 // log(1/frcpa(1+162/256))= +4.91323e-001
+data8 0x3fdf991c6cb3b378, 0x3c97d99419be6028 // log(1/frcpa(1+163/256))= +4.93720e-001
+data8 0x3fdfc07ada69a908, 0x3cbfe9341ded70b1 // log(1/frcpa(1+164/256))= +4.96123e-001
+data8 0x3fdfe7f18eb03d38, 0x3cb85718a640c33f // log(1/frcpa(1+165/256))= +4.98532e-001
+data8 0x3fe007c053c5002c, 0x3cb3addc9c065f09 // log(1/frcpa(1+166/256))= +5.00946e-001
+data8 0x3fe01b942198a5a0, 0x3c9d5aa4c77da6ac // log(1/frcpa(1+167/256))= +5.03367e-001
+data8 0x3fe02f74400c64e8, 0x3cb5a0ee4450ef52 // log(1/frcpa(1+168/256))= +5.05793e-001
+data8 0x3fe04360be7603ac, 0x3c9dd00c35630fe0 // log(1/frcpa(1+169/256))= +5.08225e-001
+data8 0x3fe05759ac47fe30, 0x3cbd063e1f0bd82c // log(1/frcpa(1+170/256))= +5.10663e-001
+data8 0x3fe06b5f1911cf50, 0x3cae8da674af5289 // log(1/frcpa(1+171/256))= +5.13107e-001
+data8 0x3fe078bf0533c568, 0x3c62241edf5fd1f7 // log(1/frcpa(1+172/256))= +5.14740e-001
+data8 0x3fe08cd9687e7b0c, 0x3cb3007febcca227 // log(1/frcpa(1+173/256))= +5.17194e-001
+data8 0x3fe0a10074cf9018, 0x3ca496e84603816b // log(1/frcpa(1+174/256))= +5.19654e-001
+data8 0x3fe0b5343a234474, 0x3cb46098d14fc90a // log(1/frcpa(1+175/256))= +5.22120e-001
+data8 0x3fe0c974c89431cc, 0x3cac0a7cdcbb86c6 // log(1/frcpa(1+176/256))= +5.24592e-001
+data8 0x3fe0ddc2305b9884, 0x3cb2f753210410ff // log(1/frcpa(1+177/256))= +5.27070e-001
+data8 0x3fe0eb524bafc918, 0x3c88affd6682229e // log(1/frcpa(1+178/256))= +5.28726e-001
+data8 0x3fe0ffb54213a474, 0x3cadeefbab9af993 // log(1/frcpa(1+179/256))= +5.31214e-001
+data8 0x3fe114253da97d9c, 0x3cbaf1c2b8bc160a // log(1/frcpa(1+180/256))= +5.33709e-001
+data8 0x3fe128a24f1d9afc, 0x3cb9cf4df375e650 // log(1/frcpa(1+181/256))= +5.36210e-001
+data8 0x3fe1365252bf0864, 0x3c985a621d4be111 // log(1/frcpa(1+182/256))= +5.37881e-001
+data8 0x3fe14ae558b4a92c, 0x3ca104c4aa8977d1 // log(1/frcpa(1+183/256))= +5.40393e-001
+data8 0x3fe15f85a19c7658, 0x3cbadf26e540f375 // log(1/frcpa(1+184/256))= +5.42910e-001
+data8 0x3fe16d4d38c119f8, 0x3cb3aea11caec416 // log(1/frcpa(1+185/256))= +5.44592e-001
+data8 0x3fe18203c20dd130, 0x3cba82d1211d1d6d // log(1/frcpa(1+186/256))= +5.47121e-001
+data8 0x3fe196c7bc4b1f38, 0x3cb6267acc4f4f4a // log(1/frcpa(1+187/256))= +5.49656e-001
+data8 0x3fe1a4a738b7a33c, 0x3c858930213c987d // log(1/frcpa(1+188/256))= +5.51349e-001
+data8 0x3fe1b981c0c9653c, 0x3c9bc2a4a30f697b // log(1/frcpa(1+189/256))= +5.53895e-001
+data8 0x3fe1ce69e8bb1068, 0x3cb7ae6199cf2a00 // log(1/frcpa(1+190/256))= +5.56447e-001
+data8 0x3fe1dc619de06944, 0x3c6b50bb38388177 // log(1/frcpa(1+191/256))= +5.58152e-001
+data8 0x3fe1f160a2ad0da0, 0x3cbd05b2778a5e1d // log(1/frcpa(1+192/256))= +5.60715e-001
+data8 0x3fe2066d7740737c, 0x3cb32e828f9c6bd6 // log(1/frcpa(1+193/256))= +5.63285e-001
+data8 0x3fe2147dba47a390, 0x3cbd579851b8b672 // log(1/frcpa(1+194/256))= +5.65001e-001
+data8 0x3fe229a1bc5ebac0, 0x3cbb321be5237ce8 // log(1/frcpa(1+195/256))= +5.67582e-001
+data8 0x3fe237c1841a502c, 0x3cb3b56e0915ea64 // log(1/frcpa(1+196/256))= +5.69306e-001
+data8 0x3fe24cfce6f80d98, 0x3cb34a4d1a422919 // log(1/frcpa(1+197/256))= +5.71898e-001
+data8 0x3fe25b2c55cd5760, 0x3cb237401ea5015e // log(1/frcpa(1+198/256))= +5.73630e-001
+data8 0x3fe2707f4d5f7c40, 0x3c9d30f20acc8341 // log(1/frcpa(1+199/256))= +5.76233e-001
+data8 0x3fe285e0842ca380, 0x3cbc4d866d5f21c0 // log(1/frcpa(1+200/256))= +5.78842e-001
+data8 0x3fe294294708b770, 0x3cb85e14d5dc54fa // log(1/frcpa(1+201/256))= +5.80586e-001
+data8 0x3fe2a9a2670aff0c, 0x3c7e6f8f468bbf91 // log(1/frcpa(1+202/256))= +5.83207e-001
+data8 0x3fe2b7fb2c8d1cc0, 0x3c930ffcf63c8b65 // log(1/frcpa(1+203/256))= +5.84959e-001
+data8 0x3fe2c65a6395f5f4, 0x3ca0afe20b53d2d2 // log(1/frcpa(1+204/256))= +5.86713e-001
+data8 0x3fe2dbf557b0df40, 0x3cb646be1188fbc9 // log(1/frcpa(1+205/256))= +5.89350e-001
+data8 0x3fe2ea64c3f97654, 0x3c96516fa8df33b2 // log(1/frcpa(1+206/256))= +5.91113e-001
+data8 0x3fe3001823684d70, 0x3cb96d64e16d1360 // log(1/frcpa(1+207/256))= +5.93762e-001
+data8 0x3fe30e97e9a8b5cc, 0x3c98ef96bc97cca0 // log(1/frcpa(1+208/256))= +5.95531e-001
+data8 0x3fe32463ebdd34e8, 0x3caef1dc9a56c1bf // log(1/frcpa(1+209/256))= +5.98192e-001
+data8 0x3fe332f4314ad794, 0x3caa4f0ac5d5fa11 // log(1/frcpa(1+210/256))= +5.99970e-001
+data8 0x3fe348d90e7464cc, 0x3cbe7889f0516acd // log(1/frcpa(1+211/256))= +6.02643e-001
+data8 0x3fe35779f8c43d6c, 0x3ca96bbab7245411 // log(1/frcpa(1+212/256))= +6.04428e-001
+data8 0x3fe36621961a6a98, 0x3ca31f32262db9fb // log(1/frcpa(1+213/256))= +6.06217e-001
+data8 0x3fe37c299f3c3668, 0x3cb15c72c107ee29 // log(1/frcpa(1+214/256))= +6.08907e-001
+data8 0x3fe38ae2171976e4, 0x3cba42a2554b2dd4 // log(1/frcpa(1+215/256))= +6.10704e-001
+data8 0x3fe399a157a603e4, 0x3cb99c62286d8919 // log(1/frcpa(1+216/256))= +6.12504e-001
+data8 0x3fe3afccfe77b9d0, 0x3ca11048f96a43bd // log(1/frcpa(1+217/256))= +6.15210e-001
+data8 0x3fe3be9d503533b4, 0x3ca4022f47588c3e // log(1/frcpa(1+218/256))= +6.17018e-001
+data8 0x3fe3cd7480b4a8a0, 0x3cb4ba7afc2dc56a // log(1/frcpa(1+219/256))= +6.18830e-001
+data8 0x3fe3e3c43918f76c, 0x3c859673d064b8ba // log(1/frcpa(1+220/256))= +6.21554e-001
+data8 0x3fe3f2acb27ed6c4, 0x3cb55c6b452a16a8 // log(1/frcpa(1+221/256))= +6.23373e-001
+data8 0x3fe4019c2125ca90, 0x3cb8c367879c5a31 // log(1/frcpa(1+222/256))= +6.25197e-001
+data8 0x3fe4181061389720, 0x3cb2c17a79c5cc6c // log(1/frcpa(1+223/256))= +6.27937e-001
+data8 0x3fe42711518df544, 0x3ca5f38d47012fc5 // log(1/frcpa(1+224/256))= +6.29769e-001
+data8 0x3fe436194e12b6bc, 0x3cb9854d65a9b426 // log(1/frcpa(1+225/256))= +6.31604e-001
+data8 0x3fe445285d68ea68, 0x3ca3ff9b3a81cd81 // log(1/frcpa(1+226/256))= +6.33442e-001
+data8 0x3fe45bcc464c8938, 0x3cb0a2d8011a6c05 // log(1/frcpa(1+227/256))= +6.36206e-001
+data8 0x3fe46aed21f117fc, 0x3c8a2be41f8e9f3d // log(1/frcpa(1+228/256))= +6.38053e-001
+data8 0x3fe47a1527e8a2d0, 0x3cba4a83594fab09 // log(1/frcpa(1+229/256))= +6.39903e-001
+data8 0x3fe489445efffcc8, 0x3cbf306a23dcbcde // log(1/frcpa(1+230/256))= +6.41756e-001
+data8 0x3fe4a018bcb69834, 0x3ca46c9285029fd1 // log(1/frcpa(1+231/256))= +6.44543e-001
+data8 0x3fe4af5a0c9d65d4, 0x3cbbc1db897580e3 // log(1/frcpa(1+232/256))= +6.46405e-001
+data8 0x3fe4bea2a5bdbe84, 0x3cb84d880d7ef775 // log(1/frcpa(1+233/256))= +6.48271e-001
+data8 0x3fe4cdf28f10ac44, 0x3cb3ec4b7893ce1f // log(1/frcpa(1+234/256))= +6.50140e-001
+data8 0x3fe4dd49cf994058, 0x3c897224d59d3408 // log(1/frcpa(1+235/256))= +6.52013e-001
+data8 0x3fe4eca86e64a680, 0x3cbccf620f24f0cd // log(1/frcpa(1+236/256))= +6.53889e-001
+data8 0x3fe503c43cd8eb68, 0x3c3f872c65971084 // log(1/frcpa(1+237/256))= +6.56710e-001
+data8 0x3fe513356667fc54, 0x3cb9ca64cc3d52c8 // log(1/frcpa(1+238/256))= +6.58595e-001
+data8 0x3fe522ae0738a3d4, 0x3cbe708164c75968 // log(1/frcpa(1+239/256))= +6.60483e-001
+data8 0x3fe5322e26867854, 0x3cb9988ba4aea615 // log(1/frcpa(1+240/256))= +6.62376e-001
+data8 0x3fe541b5cb979808, 0x3ca1662e3a6b95f5 // log(1/frcpa(1+241/256))= +6.64271e-001
+data8 0x3fe55144fdbcbd60, 0x3cb3acd4ca45c1e0 // log(1/frcpa(1+242/256))= +6.66171e-001
+data8 0x3fe560dbc45153c4, 0x3cb4988947959fed // log(1/frcpa(1+243/256))= +6.68074e-001
+data8 0x3fe5707a26bb8c64, 0x3cb3017fe6607ba9 // log(1/frcpa(1+244/256))= +6.69980e-001
+data8 0x3fe587f60ed5b8fc, 0x3cbe7a3266366ed4 // log(1/frcpa(1+245/256))= +6.72847e-001
+data8 0x3fe597a7977c8f30, 0x3ca1e12b9959a90e // log(1/frcpa(1+246/256))= +6.74763e-001
+data8 0x3fe5a760d634bb88, 0x3cb7c365e53d9602 // log(1/frcpa(1+247/256))= +6.76682e-001
+data8 0x3fe5b721d295f10c, 0x3cb716c2551ccbf0 // log(1/frcpa(1+248/256))= +6.78605e-001
+data8 0x3fe5c6ea94431ef8, 0x3ca02b2ed0e28261 // log(1/frcpa(1+249/256))= +6.80532e-001
+data8 0x3fe5d6bb22ea86f4, 0x3caf43a8bbb2f974 // log(1/frcpa(1+250/256))= +6.82462e-001
+data8 0x3fe5e6938645d38c, 0x3cbcedc98821b333 // log(1/frcpa(1+251/256))= +6.84397e-001
+data8 0x3fe5f673c61a2ed0, 0x3caa385eef5f2789 // log(1/frcpa(1+252/256))= +6.86335e-001
+data8 0x3fe6065bea385924, 0x3cb11624f165c5b4 // log(1/frcpa(1+253/256))= +6.88276e-001
+data8 0x3fe6164bfa7cc068, 0x3cbad884f87073fa // log(1/frcpa(1+254/256))= +6.90222e-001
+data8 0x3fe62643fecf9740, 0x3cb78c51da12f4df // log(1/frcpa(1+255/256))= +6.92171e-001
+ASM_SIZE_DIRECTIVE(pow_Tt)
+
+
+// Table 1 is 2^(index_1/128) where
+// index_1 goes from 0 to 15
+pow_tbl1:
+ASM_TYPE_DIRECTIVE(pow_tbl1,@object)
+data8 0x8000000000000000 , 0x00003FFF
+data8 0x80B1ED4FD999AB6C , 0x00003FFF
+data8 0x8164D1F3BC030773 , 0x00003FFF
+data8 0x8218AF4373FC25EC , 0x00003FFF
+data8 0x82CD8698AC2BA1D7 , 0x00003FFF
+data8 0x8383594EEFB6EE37 , 0x00003FFF
+data8 0x843A28C3ACDE4046 , 0x00003FFF
+data8 0x84F1F656379C1A29 , 0x00003FFF
+data8 0x85AAC367CC487B15 , 0x00003FFF
+data8 0x8664915B923FBA04 , 0x00003FFF
+data8 0x871F61969E8D1010 , 0x00003FFF
+data8 0x87DB357FF698D792 , 0x00003FFF
+data8 0x88980E8092DA8527 , 0x00003FFF
+data8 0x8955EE03618E5FDD , 0x00003FFF
+data8 0x8A14D575496EFD9A , 0x00003FFF
+data8 0x8AD4C6452C728924 , 0x00003FFF
+ASM_SIZE_DIRECTIVE(pow_tbl1)
+
+
+// Table 2 is 2^(index_1/8) where
+// index_2 goes from 0 to 7
+pow_tbl2:
+ASM_TYPE_DIRECTIVE(pow_tbl2,@object)
+data8 0x8000000000000000 , 0x00003FFF
+data8 0x8B95C1E3EA8BD6E7 , 0x00003FFF
+data8 0x9837F0518DB8A96F , 0x00003FFF
+data8 0xA5FED6A9B15138EA , 0x00003FFF
+data8 0xB504F333F9DE6484 , 0x00003FFF
+data8 0xC5672A115506DADD , 0x00003FFF
+data8 0xD744FCCAD69D6AF4 , 0x00003FFF
+data8 0xEAC0C6E7DD24392F , 0x00003FFF
+ASM_SIZE_DIRECTIVE(pow_tbl2)
+
+.global pow
+
+.section .text
+.proc pow
+.align 32
+
+pow:
+
+{ .mfi
+ alloc r32=ar.pfs,1,35,4,0
+ fms.s1 POW_Xm1 = f8,f1,f1 // Will be used for r1 if x>0
+ mov pow_GR_17ones = 0x1FFFF
+}
+{ .mfi
+(p0) addl pow_AD_P = @ltoff(pow_table_P), gp
+ fma.s1 POW_Xp1 = f8,f1,f1 // Will be used for r1 if x<0
+ nop.i 999
+;;
+}
+
+
+// Get exponent of x. Will be used to calculate K.
+{ .mfi
+ getf.exp pow_GR_signexp_X = f8
+ frcpa.s1 POW_B, p6 = f1,f8
+ nop.i 999
+}
+{ .mfi
+ ld8 pow_AD_P = [pow_AD_P]
+ fma.s1 POW_NORM_X = f8,f1,f0
+ mov pow_GR_FFF7 = 0xFFF7
+}
+;;
+
+
+
+// Get significand of x. Will be used to get index to fetch T, Tt.
+// p13 = TRUE ==> X is unorm
+// DOUBLE 0x10033 exponent limit at which y is an integer
+// SINGLE 0x10016
+{ .mfi
+ getf.sig pow_GR_sig_X = f8
+ fclass.m p13,p0 = f8, 0x0b // Test for x unorm
+ addl pow_GR_10033 = 0x10033, r0
+}
+{ .mfi
+ mov pow_GR_16ones = 0xFFFF
+ fma.s1 POW_NORM_Y = f9,f1,f0
+ nop.i 999
+}
+;;
+
+
+// p14 = TRUE ==> X is ZERO
+{ .mfi
+ adds pow_AD_Tt = pow_Tt - pow_table_P, pow_AD_P
+ fclass.m p14,p15 = f8, 0x07
+ and pow_GR_exp_X = pow_GR_signexp_X, pow_GR_17ones
+}
+{ .mfi
+ adds pow_AD_Q = pow_table_Q - pow_table_P, pow_AD_P
+ nop.f 999
+ nop.i 999
+}
+;;
+
+{ .mfi
+ ldfe POW_P5 = [pow_AD_P], 16
+ fcmp.lt.s1 p8,p9 = f8, f0 // Test for x<0
+ shl pow_GR_offset = pow_GR_sig_X, 1
+}
+{ .mib
+ ldfe POW_P4 = [pow_AD_Q], 16
+ sub pow_GR_true_exp_X = pow_GR_exp_X, pow_GR_16ones
+(p13) br.cond.spnt L(POW_X_DENORM)
+}
+;;
+
+
+// Continue normal and denormal paths here
+L(POW_COMMON):
+// p11 = TRUE ==> Y is a NAN
+{ .mfi
+ ldfe POW_P3 = [pow_AD_P], 16
+ fclass.m.unc p11,p0 = f9, 0xc3
+ shr.u pow_GR_offset = pow_GR_offset,56
+}
+{ .mfi
+ ldfe POW_P2 = [pow_AD_Q], 16
+ nop.f 999
+ nop.i 999
+}
+;;
+
+
+
+// Compute xsq to decide later if |x|=1
+// p11 = TRUE ==> Y is a NaN
+{ .mfi
+ setf.sig POW_int_K = pow_GR_true_exp_X
+(p15) fms.s1 POW_r = POW_B, POW_NORM_X,f1
+ shladd pow_AD_Tt = pow_GR_offset, 4, pow_AD_Tt
+}
+{ .mfi
+ nop.m 999
+(p8) fnma.s1 POW_Xm1 = POW_Xp1,f1,f0
+ nop.i 999
+}
+;;
+
+
+
+// p12 = TRUE ==> X is ZERO and Y is ZERO
+{ .mfi
+ ldfe POW_P1 = [pow_AD_P], 16
+(p14) fclass.m.unc p12,p0 = f9, 0x07
+ nop.i 999
+}
+{ .mfb
+ ldfe POW_P0 = [pow_AD_Q], 16
+ fma.s1 POW_xsq = POW_NORM_X, POW_NORM_X, f0
+(p11) br.cond.spnt L(POW_Y_NAN)
+}
+;;
+
+
+.pred.rel "mutex",p8,p9
+// Get exponent of |x|-1 to use in comparison to 2^-8
+{ .mmf
+(p8) getf.exp pow_GR_signexp_Xm1 = POW_Xp1
+(p9) getf.exp pow_GR_signexp_Xm1 = POW_Xm1
+ fcvt.fx.s1 POW_int_Y = POW_NORM_Y
+}
+;;
+
+
+// p11 = TRUE ==> X is a NAN
+{ .mfi
+ ldfpd POW_log2_hi, POW_log2_lo = [pow_AD_Q], 16
+ fclass.m.unc p11,p0 = f8, 0xc3
+ nop.i 999
+}
+{ .mib
+ ldfpd POW_T, POW_Tt = [pow_AD_Tt], 16
+ nop.i 999
+(p12) br.cond.spnt L(POW_X_0_Y_0)
+}
+;;
+
+
+// p14 = TRUE ==> X is zero
+// p15 = TRUE ==> X is zero AND Y is negative
+// p10 = TRUE ==> X is zero AND Y is >= zero
+{ .mfi
+ ldfe POW_inv_log2_by_128 = [pow_AD_P], 16
+(p14) fcmp.lt.unc.s1 p15, p10 = f9,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ nop.f 999
+ and pow_GR_exp_Xm1 = pow_GR_signexp_Xm1, pow_GR_17ones
+}
+;;
+
+
+// Determine if we will use the |x| near 1 path (p6) or normal path (p7)
+// p12 = TRUE ==> X is a NAN and Y is a zero
+// p13 = TRUE ==> X is a NAN and Y is anything else
+{ .mfi
+ getf.exp pow_GR_signexp_Y = POW_NORM_Y
+(p11) fclass.m.unc p12,p13 = f9, 0x07
+ cmp.lt.unc p6,p7 = pow_GR_exp_Xm1, pow_GR_FFF7
+}
+{ .mfi
+ ldfpd POW_Q2, POW_Q3 = [pow_AD_P], 16
+ fma.s1 POW_rsq = POW_r, POW_r,f0
+ nop.i 999
+;;
+}
+
+// If on the x near 1 path, assign r1 to r and r1*r1 to rsq
+{ .mfi
+ ldfpd POW_Q0_half, POW_Q1 = [pow_AD_P], 16
+(p6) fma.s1 POW_r = POW_r1, f1, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p6) fma.s1 POW_rsq = POW_r1, POW_r1, f0
+ nop.i 999
+;;
+}
+
+
+{ .mfi
+ ldfpd POW_Q4, POW_RSHF = [pow_AD_P], 16
+(p7) fma.s1 POW_v6 = POW_r, POW_P5, POW_P4
+ and pow_GR_exp_Y = pow_GR_signexp_Y, pow_GR_17ones
+}
+{ .mfb
+ nop.m 999
+(p6) fma.s1 POW_v6 = POW_r1, POW_P5, POW_P4
+(p12) br.cond.spnt L(POW_X_NAN_Y_0)
+}
+;;
+
+
+{ .mfi
+ nop.m 999
+(p7) fma.s1 POW_v4 = POW_P3, POW_r, POW_P2
+ andcm pow_GR_sign_Y = pow_GR_signexp_Y, pow_GR_17ones
+}
+{ .mfb
+ nop.m 999
+(p6) fma.s1 POW_v4 = POW_P3, POW_r1, POW_P2
+(p12) br.cond.spnt L(POW_X_NAN_Y_0)
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fcvt.xf POW_K = POW_int_K
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p13) fma.d f8 = f8,f1,f0
+(p13) br.ret.spnt b0 // Exit if x nan, y anything but zero
+}
+;;
+
+// p10 = TRUE ==> X is zero AND Y is positive
+// p8 = TRUE ==> X is zero AND Y is outside integer range (treat as even int)
+// return +0
+// p9 = TRUE ==> X is zero AND Y is within integer range (may not be integer)
+{ .mfi
+(p10) cmp.gt.unc p8,p9 = pow_GR_exp_Y, pow_GR_10033
+(p6) fmerge.s POW_delta = f0,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p6) fma.s1 POW_G = f0,f0,f0
+ nop.i 999
+}
+;;
+
+{ .mfi
+ getf.sig pow_GR_sig_int_Y = POW_int_Y
+ fnma.s1 POW_twoV = POW_NORM_Y, POW_rsq,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_U = POW_NORM_Y,POW_r,f0
+ nop.i 999
+}
+;;
+
+{ .mfi
+ ldfe POW_log2_by_128_lo = [pow_AD_P], 16
+(p6) fma.s1 POW_v2 = POW_P1, POW_r1, POW_P0
+ nop.i 999
+}
+{ .mfi
+ ldfe POW_log2_by_128_hi = [pow_AD_Q], 16
+(p7) fma.s1 POW_v2 = POW_P1, POW_r, POW_P0
+ nop.i 999
+}
+;;
+
+
+{ .mfi
+ nop.m 999
+ fcvt.xf POW_float_int_Y = POW_int_Y
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_v3 = POW_v6, POW_rsq, POW_v4
+ adds pow_AD_tbl1 = pow_tbl1 - pow_Tt, pow_AD_Q
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p7) fma.s1 POW_delta = POW_K, POW_log2_lo, POW_Tt
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p7) fma.s1 POW_G = POW_K, POW_log2_hi, POW_T
+ adds pow_AD_tbl2 = pow_tbl2 - pow_tbl1, pow_AD_tbl1
+}
+;;
+
+
+{ .mfi
+ nop.m 999
+ fms.s1 POW_e2 = POW_NORM_Y, POW_r, POW_U
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_Z2 = POW_twoV, POW_Q0_half, POW_U
+ nop.i 999
+}
+;;
+
+// p11 = TRUE ==> X is NEGATIVE
+// p8 = TRUE ==> X is zero AND Y is outside intger range (treat as even int)
+// return +0
+{ .mfi
+ nop.m 999
+ fclass.m.unc p11,p0 = f8, 0x1a
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p8) fma.d f8 = f0,f0,f0
+(p8) br.ret.spnt b0
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 POW_Yrcub = POW_rsq, POW_U, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_p = POW_rsq, POW_v3, POW_v2
+ nop.i 999
+}
+;;
+
+
+// p11 = TRUE ==> X is NEGATIVE
+// p12 = TRUE ==> X is NEGATIVE AND Y already int
+// p13 = TRUE ==> X is NEGATIVE AND Y possible int
+{ .mfi
+ nop.m 999
+ fma.s1 POW_Z1 = POW_NORM_Y, POW_G, f0
+(p11) cmp.ge.unc p12,p13 = pow_GR_exp_Y, pow_GR_10033
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_e3 = POW_NORM_Y, POW_delta, f0
+ nop.i 999
+}
+;;
+
+// p9 = TRUE ==> X is zero AND Y is within integer range (may not be integer)
+// p6 = TRUE ==> X is zero AND Y is an integer (may be even or odd)
+// p7 = TRUE ==> X is zero AND Y is NOT an integer, return +0
+{ .mfi
+ nop.m 999
+(p9) fcmp.eq.unc.s1 p6,p7 = POW_float_int_Y, POW_NORM_Y
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_Gpr = POW_G, f1, POW_r
+ nop.i 999
+}
+;;
+
+// By adding RSHF (1.1000...*2^63) we put integer part in rightmost significand
+{ .mfi
+ nop.m 999
+ fma.s1 POW_W2 = POW_Z2, POW_inv_log2_by_128, POW_RSHF
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fms.s1 POW_UmZ2 = POW_U, f1, POW_Z2
+ nop.i 999
+}
+;;
+
+
+// If x=0 and y>0, test y and flag denormal
+// p6 = TRUE ==> X is zero AND Y is an integer (may be even or odd)
+// p8 = TRUE ==> X is zero AND Y is an odd integer
+// p9 = TRUE ==> X is zero AND Y is an even integer
+{ .mfi
+ nop.m 999
+(p10) fcmp.eq.s0 p15,p0 = f9,f0
+(p6) tbit.nz.unc p8,p9 = pow_GR_sig_int_Y,0
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_Z3 = POW_p, POW_Yrcub, f0
+ nop.i 999
+}
+;;
+
+// By adding RSHF (1.1000...*2^63) we put integer part in rightmost significand
+{ .mfi
+ nop.m 999
+ fms.s1 POW_e1 = POW_NORM_Y, POW_G, POW_Z1
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_W1 = POW_Z1, POW_inv_log2_by_128, POW_RSHF
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p7) fma.d f8 = f0,f0,f0 // Result +0 if x zero and y not integer
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+ fma.s1 POW_Y_Gpr = POW_NORM_Y, POW_Gpr, f0
+(p8) br.ret.spnt b0 // Exit if x zero and y odd integer
+}
+;;
+
+// By subtracting RSHF we get rounded integer POW_N2float
+// p15 = TRUE ==> X_0_Y_NEG
+{ .mfi
+ nop.m 999
+ fms.s1 POW_N2float = POW_W2, f1, POW_RSHF
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+ fma.s1 POW_UmZ2pV = POW_twoV,POW_Q0_half,POW_UmZ2
+(p15) br.cond.spnt L(POW_X_0_Y_NEG)
+}
+;;
+
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 POW_Z3sq = POW_Z3, POW_Z3, f0
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+ fma.s1 POW_v4 = POW_Z3, POW_Q3, POW_Q2
+(p7) br.ret.spnt b0 // Exit if x zero and y not an integer
+}
+;;
+
+
+
+// Extract rounded integer from rightmost significand of POW_W2
+// By subtracting RSHF we get rounded integer POW_N1float
+{ .mfi
+ getf.sig pow_GR_int_W2 = POW_W2
+ fms.s1 POW_N1float = POW_W1, f1, POW_RSHF
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_v2 = POW_Z3, POW_Q1, POW_Q0_half
+ nop.i 999
+}
+;;
+
+
+
+
+// p13 = TRUE ==> X is NEGATIVE AND Y possible int
+// p10 = TRUE ==> X is NEG and Y is an int
+// p12 = TRUE ==> X is NEG and Y is not an int
+{ .mfi
+ nop.m 999
+(p13) fcmp.eq.unc.s1 p10,p12 = POW_float_int_Y, POW_NORM_Y
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p9) fma.d f8 = f0,f0,f0 // Result +0 if x zero and y even integer
+(p9) br.ret.spnt b0 // Exit if x zero and y even integer
+}
+;;
+
+
+{ .mfi
+ nop.m 999
+ fnma.s1 POW_s2 = POW_N2float, POW_log2_by_128_hi, POW_Z2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_e2 = POW_e2,f1,POW_UmZ2pV
+ nop.i 999
+}
+;;
+
+// Extract rounded integer from rightmost significand of POW_W1
+// Test if x inf
+{ .mfi
+ getf.sig pow_GR_int_W1 = POW_W1
+ fclass.m.unc p15,p0 = POW_NORM_X, 0x23
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+ fnma.s1 POW_f2 = POW_N2float, POW_log2_by_128_lo, f1
+(p12) br.cond.spnt L(POW_X_NEG_Y_NONINT) // Branch if x neg, y not integer
+}
+;;
+
+// p12 = TRUE ==> X is NEGATIVE AND Y is an odd integer
+{ .mfi
+ getf.exp pow_GR_signexp_Y_Gpr = POW_Y_Gpr
+ fma.s1 POW_v3 = POW_Z3sq, POW_Q4, POW_v4
+(p10) tbit.nz.unc p12,p0 = pow_GR_sig_int_Y,0
+}
+;;
+
+
+{ .mfi
+ add pow_GR_int_N = pow_GR_int_W1, pow_GR_int_W2
+ fnma.s1 POW_f1 = POW_N1float, POW_log2_by_128_lo, f1
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+ fnma.s1 POW_s1 = POW_N1float, POW_log2_by_128_hi, POW_Z1
+(p15) br.cond.spnt L(POW_X_INF)
+}
+;;
+
+
+// Test x and y and flag denormal
+{ .mfi
+ and pow_GR_index1 = 0x0f, pow_GR_int_N
+ fcmp.eq.s0 p15,p0 = f8,f9
+ shr r2 = pow_GR_int_N, 7
+}
+{ .mfi
+ and pow_GR_exp_Y_Gpr = pow_GR_signexp_Y_Gpr, pow_GR_17ones
+ nop.f 999
+ and pow_GR_index2 = 0x70, pow_GR_int_N
+}
+;;
+
+
+
+{ .mfi
+ shladd pow_AD_T1 = pow_GR_index1, 4, pow_AD_tbl1
+ fcmp.eq.s1 p7,p0 = POW_NORM_Y, f1 // Test for y=1.0
+ sub pow_GR_true_exp_Y_Gpr = pow_GR_exp_Y_Gpr, pow_GR_16ones
+}
+{ .mfi
+ addl pow_int_GR_M = 0xFFFF, r2
+ fma.s1 POW_e12 = POW_e1,f1,POW_e2
+ add pow_AD_T2 = pow_AD_tbl2, pow_GR_index2
+}
+;;
+
+
+{ .mmi
+ ldfe POW_T1 = [pow_AD_T1],16
+ setf.exp POW_2M = pow_int_GR_M
+ andcm pow_GR_sign_Y_Gpr = pow_GR_signexp_Y_Gpr, pow_GR_17ones
+}
+;;
+
+
+{ .mfb
+ ldfe POW_T2 = [pow_AD_T2],16
+ fma.s1 POW_q = POW_Z3sq, POW_v3, POW_v2
+(p7) br.ret.spnt b0 // Early exit if y=1.0, result is x
+}
+;;
+
+
+// double: p8 TRUE ==> |Y(G + r)| >= 10
+// single: p8 TRUE ==> |Y(G + r)| >= 7
+
+// double
+// -2^10 -2^9 2^9 2^10
+// -----+-----+----+ ... +-----+-----+-----
+// p8 | p9 | p8
+// | | p10 | |
+// single
+// -2^7 -2^6 2^6 2^7
+// -----+-----+----+ ... +-----+-----+-----
+// p8 | p9 | p8
+// | | p10 | |
+
+
+{ .mfi
+(p0) cmp.le.unc p8,p9 = 10, pow_GR_true_exp_Y_Gpr
+ fma.s1 POW_s = POW_s1, f1, POW_s2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_f12 = POW_f1, POW_f2,f0
+ nop.i 999
+}
+;;
+
+
+{ .mfi
+ nop.f 999
+(p9) cmp.le.unc p0,p10 = 9, pow_GR_true_exp_Y_Gpr
+}
+;;
+
+
+
+{ .mfb
+ nop.m 999
+ fma.s1 POW_e123 = POW_e12, f1, POW_e3
+(p8) br.cond.spnt L(POW_OVER_UNDER_X_NOT_INF)
+}
+;;
+
+
+{ .mmf
+ fma.s1 POW_q = POW_Z3sq, POW_q, POW_Z3
+}
+;;
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 POW_ssq = POW_s, POW_s, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_v4 = POW_s, POW_Q3, POW_Q2
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 POW_v2 = POW_s, POW_Q1, POW_Q0_half
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_1ps = f1,f1,POW_s
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 POW_f3 = POW_e123,f1,f1
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 POW_T1T2 = POW_T1, POW_T2, f0
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 POW_v3 = POW_ssq, POW_Q4, POW_v4
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 POW_v21ps = POW_ssq, POW_v2, POW_1ps
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_s4 = POW_ssq, POW_ssq, f0
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 POW_f123 = POW_f12, POW_f3, f0
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 POW_A = POW_2M, POW_T1T2, f0
+ nop.i 999
+}
+;;
+
+
+
+{ .mfi
+ nop.m 999
+(p12) fmerge.s POW_f123 = f8,POW_f123 // if x neg, y odd int
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+// fma.s1 POW_es = POW_ssq, POW_v3, POW_v2
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 POW_es = POW_s4, POW_v3, POW_v21ps
+ nop.i 999
+}
+;;
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 POW_A = POW_A, POW_f123, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+// fma.s1 POW_es = POW_es, POW_ssq, POW_1ps
+ nop.i 999
+}
+;;
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 POW_A = POW_A, POW_es,f0
+ nop.i 999
+}
+;;
+
+
+
+{ .mfb
+ nop.m 999
+(p10) fma.d f8 = POW_A, POW_q, POW_A
+(p10) br.ret.sptk b0
+}
+;;
+
+
+
+
+
+// POSSIBLE_OVER_UNDER
+// p6 = TRUE ==> Y negative
+
+{ .mfi
+ nop.m 999
+ fmerge.s POW_abs_A = f0, POW_A
+ cmp.eq.unc p0,p6 = pow_GR_sign_Y, r0
+}
+;;
+
+{ .mib
+ nop.m 999
+ nop.i 999
+(p6) br.cond.spnt L(POW_POSSIBLE_UNDER)
+}
+;;
+
+// POSSIBLE_OVER
+// We got an answer.
+// overflow is a possibility, not a certainty
+
+
+// We define an overflow when the answer with
+// WRE set
+// user-defined rounding mode
+
+// double
+// Largest double is 7FE (biased double)
+// 7FE - 3FF + FFFF = 103FE
+// Create + largest_double_plus_ulp
+// Create - largest_double_plus_ulp
+// Calculate answer with WRE set.
+
+// single
+// Largest single is FE (biased double)
+// FE - 7F + FFFF = 1007E
+// Create + largest_single_plus_ulp
+// Create - largest_single_plus_ulp
+// Calculate answer with WRE set.
+
+// Cases when answer is ldn+1 are as follows:
+// ldn ldn+1
+// --+----------|----------+------------
+// |
+// +inf +inf -inf
+// RN RN
+// RZ
+
+
+// Put in s2 (td set, wre set)
+{ .mfi
+ mov pow_GR_gt_ln = 0x103ff
+ fsetc.s2 0x7F,0x42
+ nop.i 999
+}
+;;
+
+
+{ .mfi
+ setf.exp POW_gt_pln = pow_GR_gt_ln
+ fma.d.s2 POW_wre_urm_f8 = POW_abs_A, POW_q, POW_abs_A
+ nop.i 999 ;;
+}
+
+// Return s2 to default
+{ .mfi
+ nop.m 999
+ fsetc.s2 0x7F,0x40
+ nop.i 999
+}
+;;
+
+
+// p7 = TRUE ==> yes, we have an overflow
+{ .mfi
+ nop.m 999
+ fcmp.ge.unc.s1 p7, p0 = POW_wre_urm_f8, POW_gt_pln
+ nop.i 999
+}
+;;
+
+
+
+{ .mfb
+(p7) mov pow_GR_tag = 24
+ fma.d f8 = POW_A, POW_q, POW_A
+(p7) br.cond.spnt __libm_error_region
+}
+{ .mfb
+ nop.m 999
+ nop.f 999
+(p0) br.ret.sptk b0
+}
+;;
+
+
+L(POW_POSSIBLE_UNDER):
+// We got an answer. input was < -2^9 but > -2^10 (double)
+// We got an answer. input was < -2^6 but > -2^7 (float)
+// underflow is a possibility, not a certainty
+
+// We define an underflow when the answer with
+// ftz set
+// is zero (tiny numbers become zero)
+// Notice (from below) that if we have an unlimited exponent range,
+// then there is an extra machine number E between the largest denormal and
+// the smallest normal.
+// So if with unbounded exponent we round to E or below, then we are
+// tiny and underflow has occurred.
+// But notice that you can be in a situation where we are tiny, namely
+// rounded to E, but when the exponent is bounded we round to smallest
+// normal. So the answer can be the smallest normal with underflow.
+// E
+// -----+--------------------+--------------------+-----
+// | | |
+// 1.1...10 2^-3fff 1.1...11 2^-3fff 1.0...00 2^-3ffe
+// 0.1...11 2^-3ffe (biased, 1)
+// largest dn smallest normal
+
+
+// Put in s2 (td set, ftz set)
+{ .mfi
+ nop.m 999
+ fsetc.s2 0x7F,0x41
+ nop.i 999
+}
+;;
+
+
+
+{ .mfi
+ nop.m 999
+ fma.d.s2 POW_ftz_urm_f8 = POW_A, POW_q, POW_A
+ nop.i 999
+}
+;;
+
+
+// Return s2 to default
+{ .mfi
+ nop.m 999
+ fsetc.s2 0x7F,0x40
+ nop.i 999
+}
+;;
+
+
+// p7 = TRUE ==> yes, we have an underflow
+{ .mfi
+ nop.m 999
+ fcmp.eq.unc.s1 p7, p0 = POW_ftz_urm_f8, f0
+ nop.i 999
+}
+;;
+
+
+
+
+{ .mfb
+(p7) mov pow_GR_tag = 25
+ fma.d f8 = POW_A, POW_q, POW_A
+(p7) br.cond.spnt __libm_error_region
+}
+;;
+
+
+{ .mfb
+ nop.m 999
+ nop.f 999
+ br.ret.sptk b0
+}
+;;
+
+
+L(POW_X_DENORM):
+// Here if x unorm. Use the NORM_X for getf instructions, and the back
+// to normal path
+{ .mfi
+ getf.exp pow_GR_signexp_X = POW_NORM_X
+ nop.f 999
+ nop.i 999
+}
+;;
+
+{ .mfi
+ getf.sig pow_GR_sig_X = POW_NORM_X
+ nop.f 999
+ nop.i 999
+}
+;;
+
+{ .mfi
+ and pow_GR_exp_X = pow_GR_signexp_X, pow_GR_17ones
+ nop.f 999
+}
+;;
+
+{ .mib
+ sub pow_GR_true_exp_X = pow_GR_exp_X, pow_GR_16ones
+ shl pow_GR_offset = pow_GR_sig_X, 1
+ br.cond.sptk L(POW_COMMON)
+}
+;;
+
+
+L(POW_X_0_Y_0):
+// When X is +-0 and Y is +-0, IEEE returns 1.0
+// We call error support with this value
+
+{ .mfb
+ mov pow_GR_tag = 26
+ fma.d f8 = f1,f1,f0
+ br.cond.sptk __libm_error_region
+}
+;;
+
+
+
+
+L(POW_X_INF):
+// When X is +-inf and Y is +-, IEEE returns
+
+// overflow
+// X +inf Y +inf +inf
+// X -inf Y +inf +inf
+
+// X +inf Y >0 +inf
+// X -inf Y >0, !odd integer +inf <== (-inf)^0.5 = +inf !!
+// X -inf Y >0, odd integer -inf
+
+// underflow
+// X +inf Y -inf +0
+// X -inf Y -inf +0
+
+// X +inf Y <0 +0
+// X -inf Y <0, !odd integer +0
+// X -inf Y <0, odd integer -0
+
+// X + inf Y=+0 +1
+// X + inf Y=-0 +1
+// X - inf Y=+0 +1
+// X - inf Y=-0 +1
+
+// p13 == Y negative
+// p14 == Y positive
+
+// p6 == Y is a floating point number outside the integer.
+// Hence it is an integer and is even.
+// p13 == (Y negative)
+// return +inf
+// p14 == (Y positive)
+// return +0
+
+
+
+// p7 == Y is a floating point number within the integer range.
+// p9 == (int_Y = NORM_Y), Y is an integer, which may be odd or even.
+// p11 odd
+// p13 == (Y negative)
+// return (sign_of_x)inf
+// p14 == (Y positive)
+// return (sign_of_x)0
+// pxx even
+// p13 == (Y negative)
+// return +inf
+// p14 == (Y positive)
+// return +0
+
+// pxx == Y is not an integer
+// p13 == (Y negative)
+// return +inf
+// p14 == (Y positive)
+// return +0
+//
+
+// If x=inf, test y and flag denormal
+{ .mfi
+ nop.m 999
+ fcmp.eq.s0 p10,p11 = f9,f0
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fcmp.lt p13,p14 = POW_NORM_Y,f0
+ cmp.gt.unc p6,p7 = pow_GR_exp_Y, pow_GR_10033
+}
+{ .mfi
+ nop.m 999
+ fclass.m p12,p0 = f9, 0x23
+ nop.i 999
+}
+;;
+
+
+{ .mfi
+ nop.m 999
+ fclass.m p15,p0 = f9, 0x07 //@zero
+ nop.i 999
+}
+;;
+
+{ .mfb
+ nop.m 999
+(p15) fmerge.s f8 = f1,f1
+(p15) br.ret.spnt b0
+}
+;;
+
+
+{ .mfi
+(p13) mov pow_GR_tag = 25
+(p14) frcpa.s1 f8,p10 = f1,f0
+ nop.i 999
+}
+{ .mfb
+(p14) mov pow_GR_tag = 24
+(p13) fma.s1 f8 = f0,f0,f0
+(p12) br.ret.spnt b0
+}
+;;
+
+
+
+{ .mfb
+ nop.m 999
+(p7) fcmp.eq.unc.s1 p9,p0 = POW_float_int_Y, POW_NORM_Y
+ nop.b 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ nop.f 999
+(p9) tbit.nz.unc p11,p0 = pow_GR_sig_int_Y,0
+}
+;;
+
+{ .mfb
+ nop.m 999
+(p11) fmerge.s f8 = POW_NORM_X,f8
+ br.ret.sptk b0
+}
+;;
+
+
+
+L(POW_X_0_Y_NEG):
+// When X is +-0 and Y is negative, IEEE returns
+// X Y answer
+// +0 -odd int +inf
+// -0 -odd int -inf
+
+// +0 !-odd int +inf
+// -0 !-odd int +inf
+
+
+// p6 == Y is a floating point number outside the integer.
+// Hence it is an integer and is even.
+// return +inf
+
+// p7 == Y is a floating point number within the integer range.
+// p9 == (int_Y = NORM_Y), Y is an integer, which may be odd or even.
+// p11 odd
+// return (sign_of_x)inf
+// p12 even
+// return +inf
+// p10 == Y is not an integer
+// return +inf
+//
+//
+
+{ .mfi
+ nop.m 999
+ nop.f 999
+ cmp.gt.unc p6,p7 = pow_GR_exp_Y, pow_GR_10033
+}
+;;
+
+
+{ .mfi
+ mov pow_GR_tag = 27
+(p7) fcmp.eq.unc.s1 p9,p10 = POW_float_int_Y, POW_NORM_Y
+ nop.i 999
+}
+;;
+
+
+{ .mfb
+ nop.m 999
+(p6) frcpa.s0 f8,p13 = f1, f0
+(p6) br.cond.sptk __libm_error_region
+}
+;;
+
+{ .mfb
+ nop.m 999
+(p10) frcpa.s0 f8,p13 = f1, f0
+(p10) br.cond.sptk __libm_error_region
+}
+;;
+
+
+
+{ .mib
+ nop.m 999
+(p9) tbit.nz.unc p11,p12 = pow_GR_sig_int_Y,0
+ nop.b 999
+}
+;;
+
+
+
+{ .mfi
+ nop.m 999
+(p12) frcpa.s0 f8,p13 = f1,f0
+ nop.i 999
+}
+;;
+
+{ .mfb
+ nop.m 999
+(p11) frcpa f8,p13 = f1,f8
+ br.cond.sptk __libm_error_region
+}
+;;
+
+
+
+
+L(POW_X_NEG_Y_NONINT):
+// When X is negative and Y is a non-integer, IEEE
+// returns a qnan indefinite.
+// We call error support with this value
+
+{ .mfb
+ mov pow_GR_tag = 28
+ frcpa f8,p6 = f0,f0
+ br.cond.sptk __libm_error_region
+}
+;;
+
+
+
+
+L(POW_X_NAN_Y_0):
+// When X is a NAN and Y is zero, IEEE returns 1.
+// We call error support with this value.
+
+{ .mfi
+ nop.m 0
+ fma.d.s0 f10 = f8,f1,f0
+ nop.i 0
+}
+{ .mfb
+ mov pow_GR_tag = 29
+ fma.d.s0 f8 = f0,f0,f1
+ br.cond.sptk __libm_error_region
+}
+;;
+
+
+L(POW_OVER_UNDER_X_NOT_INF):
+
+// p8 is TRUE for overflow
+// p9 is TRUE for underflow
+
+// if y is infinity, we should not over/underflow
+
+
+{ .mfi
+ nop.m 999
+ fcmp.eq.unc.s1 p14, p13 = POW_xsq,f1
+ cmp.eq.unc p8,p9 = pow_GR_sign_Y_Gpr, r0
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p14) fclass.m.unc p15, p0 = f9, 0x23
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p13) fclass.m.unc p11,p0 = f9, 0x23
+ nop.i 999
+}
+;;
+
+// p15 = TRUE if |x|=1, y=inf, return +1
+{ .mfb
+ nop.m 999
+(p15) fma.d f8 = f1,f1,f0
+(p15) br.ret.spnt b0
+}
+;;
+
+.pred.rel "mutex",p8,p9
+{ .mfb
+(p8) setf.exp f8 = pow_GR_17ones
+(p9) fmerge.s f8 = f0,f0
+(p11) br.ret.sptk b0
+}
+
+{ .mfb
+ nop.m 999
+ nop.f 999
+ br.cond.sptk L(POW_OVER_UNDER_ERROR)
+}
+;;
+
+L(POW_Y_NAN):
+
+// Is x = +1 then result is +1, else result is quiet Y
+{ .mfi
+ nop.m 999
+ fcmp.eq.s1 p10,p9 = POW_NORM_X, f1
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p10) fcmp.eq.s0 p6,p0 = f9,f1 // Set invalid, even if x=+1
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p10) fma.d f8 = f1,f1,f0
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p9) fma.d f8 = f9,f8,f0
+ br.ret.sptk b0
+}
+;;
+
+
+L(POW_OVER_UNDER_ERROR):
+
+{ .mfi
+ nop.m 999
+ fmerge.s f10 = POW_NORM_X,POW_NORM_X
+ nop.i 999
+}
+{ .mfi
+ sub pow_GR_17ones_m1 = pow_GR_17ones, r0, 1
+ nop.f 999
+ mov pow_GR_one = 0x1
+}
+;;
+
+// overflow
+{ .mmb
+(p8) mov pow_GR_tag = 24
+(p8) setf.exp f11 = pow_GR_17ones_m1
+ nop.b 999
+}
+;;
+
+
+// underflow
+{ .mmi
+(p9) mov pow_GR_tag = 25
+(p9) setf.exp f11 = pow_GR_one
+ nop.i 999
+}
+;;
+
+
+// p12 x is negative and y is an odd integer
+
+
+{ .mfi
+ nop.m 999
+ fma.d f8 = f11, f11, f0
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p12) fmerge.ns f8 = f8, f8
+ nop.i 999
+}
+;;
+
+
+.endp pow
+ASM_SIZE_DIRECTIVE(pow)
+
+
+// Stack operations when calling error support.
+// (1) (2) (3) (call) (4)
+// sp -> + psp -> + psp -> + sp -> +
+// | | | |
+// | | <- GR_Y R3 ->| <- GR_RESULT | -> f8
+// | | | |
+// | <-GR_Y Y2->| Y2 ->| <- GR_Y |
+// | | | |
+// | | <- GR_X X1 ->| |
+// | | | |
+// sp-64 -> + sp -> + sp -> + +
+// save ar.pfs save b0 restore gp
+// save gp restore ar.pfs
+
+
+
+.proc __libm_error_region
+__libm_error_region:
+
+// Answer is inf for overflow and 0 for underflow.
+.prologue
+// (1)
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+
+
+// (2)
+{ .mmi
+ stfd [GR_Parameter_Y] = POW_NORM_Y,16 // STORE Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+
+.body
+// (3)
+{ .mib
+ stfd [GR_Parameter_X] = POW_NORM_X // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
+}
+{ .mib
+ stfd [GR_Parameter_Y] = f8 // STORE Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+
+// (4)
+{ .mmi
+ ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_powf.S b/sysdeps/ia64/fpu/e_powf.S
new file mode 100644
index 0000000..1c0ebd8
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_powf.S
@@ -0,0 +1,2309 @@
+.file "powf.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00 Initial version
+// 2/03/00 Added p12 to definite over/under path. With odd power we did not
+// maintain the sign of x in this path.
+// 4/04/00 Unwind support added
+// 4/19/00 pow(+-1,inf) now returns NaN
+// pow(+-val, +-inf) returns 0 or inf, but now does not call error support
+// Added s1 to fcvt.fx because invalid flag was incorrectly set.
+// 8/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+// 9/07/00 Improved performance by eliminating bank conflicts and other stalls,
+// and tweaking the critical path
+// 9/08/00 Per c99, pow(+-1,inf) now returns 1, and pow(+1,nan) returns 1
+// 9/28/00 Updated NaN**0 path
+// 1/20/01 Fixed denormal flag settings.
+// 2/12/01 Improved speed.
+//
+// API
+//==============================================================
+// double pow(double)
+// float powf(float)
+//
+// Overview of operation
+//==============================================================
+//
+// Three steps...
+// 1. Log(x)
+// 2. y Log(x)
+// 3. exp(y log(x))
+//
+// This means we work with the absolute value of x and merge in the sign later.
+// Log(x) = G + delta + r -rsq/2 + p
+// G,delta depend on the exponent of x and table entries. The table entries are
+// indexed by the exponent of x, called K.
+//
+// The G and delta come out of the reduction; r is the reduced x.
+//
+// B = frcpa(x)
+// xB-1 is small means that B is the approximate inverse of x.
+//
+// Log(x) = Log( (1/B)(Bx) )
+// = Log(1/B) + Log(Bx)
+// = Log(1/B) + Log( 1 + (Bx-1))
+//
+// x = 2^K 1.x_1x_2.....x_52
+// B= frcpa(x) = 2^-k Cm
+// Log(1/B) = Log(1/(2^-K Cm))
+// Log(1/B) = Log((2^K/ Cm))
+// Log(1/B) = K Log(2) + Log(1/Cm)
+//
+// Log(x) = K Log(2) + Log(1/Cm) + Log( 1 + (Bx-1))
+//
+// If you take the significand of x, set the exponent to true 0, then Cm is
+// the frcpa. We tabulate the Log(1/Cm) values. There are 256 of them.
+// The frcpa table is indexed by 8 bits, the x_1 thru x_8.
+// m = x_1x_2...x_8 is an 8-bit index.
+//
+// Log(1/Cm) = log(1/frcpa(1+m/256)) where m goes from 0 to 255.
+//
+// We tabluate as two doubles, T and t, where T +t is the value itself.
+//
+// Log(x) = (K Log(2)_hi + T) + (Log(2)_hi + t) + Log( 1 + (Bx-1))
+// Log(x) = G + delta + Log( 1 + (Bx-1))
+//
+// The Log( 1 + (Bx-1)) can be calculated as a series in r = Bx-1.
+//
+// Log( 1 + (Bx-1)) = r - rsq/2 + p
+//
+// Then,
+//
+// yLog(x) = yG + y delta + y(r-rsq/2) + yp
+// yLog(x) = Z1 + e3 + Z2 + Z3 + (e2 + e3)
+//
+//
+// exp(yLog(x)) = exp(Z1 + Z2 + Z3) exp(e1 + e2 + e3)
+//
+//
+// exp(Z3) is another series.
+// exp(e1 + e2 + e3) is approximated as f3 = 1 + (e1 + e2 + e3)
+//
+// Z1 (128/log2) = number of log2/128 in Z1 is N1
+// Z2 (128/log2) = number of log2/128 in Z2 is N2
+//
+// s1 = Z1 - N1 log2/128
+// s2 = Z2 - N2 log2/128
+//
+// s = s1 + s2
+// N = N1 + N2
+//
+// exp(Z1 + Z2) = exp(Z)
+// exp(Z) = exp(s) exp(N log2/128)
+//
+// exp(r) = exp(Z - N log2/128)
+//
+// r = s + d = (Z - N (log2/128)_hi) -N (log2/128)_lo
+// = Z - N (log2/128)
+//
+// Z = s+d +N (log2/128)
+//
+// exp(Z) = exp(s) (1+d) exp(N log2/128)
+//
+// N = M 128 + n
+//
+// N log2/128 = M log2 + n log2/128
+//
+// n is 8 binary digits = n_7n_6...n_1
+//
+// n log2/128 = n_7n_6n_5 16 log2/128 + n_4n_3n_2n_1 log2/128
+// n log2/128 = n_7n_6n_5 log2/8 + n_4n_3n_2n_1 log2/128
+// n log2/128 = I2 log2/8 + I1 log2/128
+//
+// N log2/128 = M log2 + I2 log2/8 + I1 log2/128
+//
+// exp(Z) = exp(s) (1+d) exp(log(2^M) + log(2^I2/8) + log(2^I1/128))
+// exp(Z) = exp(s) (1+d1) (1+d2)(2^M) 2^I2/8 2^I1/128
+// exp(Z) = exp(s) f1 f2 (2^M) 2^I2/8 2^I1/128
+//
+// I1, I2 are table indices. Use a series for exp(s).
+// Then get exp(Z)
+//
+// exp(yLog(x)) = exp(Z1 + Z2 + Z3) exp(e1 + e2 + e3)
+// exp(yLog(x)) = exp(Z) exp(Z3) f3
+// exp(yLog(x)) = exp(Z)f3 exp(Z3)
+// exp(yLog(x)) = A exp(Z3)
+//
+// We actually calculate exp(Z3) -1.
+// Then,
+// exp(yLog(x)) = A + A( exp(Z3) -1)
+//
+
+// Table Generation
+//==============================================================
+
+// The log values
+// ==============
+// The operation (K*log2_hi) must be exact. K is the true exponent of x.
+// If we allow gradual underflow (denormals), K can be represented in 12 bits
+// (as a two's complement number). We assume 13 bits as an engineering precaution.
+//
+// +------------+----------------+-+
+// | 13 bits | 50 bits | |
+// +------------+----------------+-+
+// 0 1 66
+// 2 34
+//
+// So we want the lsb(log2_hi) to be 2^-50
+// We get log2 as a quad-extended (15-bit exponent, 128-bit significand)
+//
+// 0 fffe b17217f7d1cf79ab c9e3b39803f2f6af (4...)
+//
+// Consider numbering the bits left to right, starting at 0 thru 127.
+// Bit 0 is the 2^-1 bit; bit 49 is the 2^-50 bit.
+//
+// ...79ab
+// 0111 1001 1010 1011
+// 44
+// 89
+//
+// So if we shift off the rightmost 14 bits, then (shift back only
+// the top half) we get
+//
+// 0 fffe b17217f7d1cf4000 e6af278ece600fcb dabc000000000000
+//
+// Put the right 64-bit signficand in an FR register, convert to double;
+// it is exact. Put the next 128 bits into a quad register and round to double.
+// The true exponent of the low part is -51.
+//
+// hi is 0 fffe b17217f7d1cf4000
+// lo is 0 ffcc e6af278ece601000
+//
+// Convert to double memory format and get
+//
+// hi is 0x3fe62e42fefa39e8
+// lo is 0x3cccd5e4f1d9cc02
+//
+// log2_hi + log2_lo is an accurate value for log2.
+//
+//
+// The T and t values
+// ==================
+// A similar method is used to generate the T and t values.
+//
+// K * log2_hi + T must be exact.
+//
+// Smallest T,t
+// ----------
+// The smallest T,t is
+// T t
+// data8 0x3f60040155d58800, 0x3c93bce0ce3ddd81 log(1/frcpa(1+0/256))= +1.95503e-003
+//
+// The exponent is 0x3f6 (biased) or -9 (true).
+// For the smallest T value, what we want is to clip the significand such that
+// when it is shifted right by 9, its lsb is in the bit for 2^-51. The 9 is the specific
+// for the first entry. In general, it is 0xffff - (biased 15-bit exponent).
+
+// Independently, what we have calculated is the table value as a quad precision number.
+// Table entry 1 is
+// 0 fff6 80200aaeac44ef38 338f77605fdf8000
+//
+// We store this quad precision number in a data structure that is
+// sign: 1
+// exponent: 15
+// signficand_hi: 64 (includes explicit bit)
+// signficand_lo: 49
+// Because the explicit bit is included, the significand is 113 bits.
+//
+// Consider significand_hi for table entry 1.
+//
+//
+// +-+--- ... -------+--------------------+
+// | |
+// +-+--- ... -------+--------------------+
+// 0 1 4444444455555555556666
+// 2345678901234567890123
+//
+// Labeled as above, bit 0 is 2^0, bit 1 is 2^-1, etc.
+// Bit 42 is 2^-42. If we shift to the right by 9, the bit in
+// bit 42 goes in 51.
+//
+// So what we want to do is shift bits 43 thru 63 into significand_lo.
+// This is shifting bit 42 into bit 63, taking care to retain the shifted-off bits.
+// Then shifting (just with signficaand_hi) back into bit 42.
+//
+// The shift_value is 63-42 = 21. In general, this is
+// 63 - (51 -(0xffff - 0xfff6))
+// For this example, it is
+// 63 - (51 - 9) = 63 - 42 = 21
+//
+// This means we are shifting 21 bits into significand_lo. We must maintain more
+// that a 128-bit signficand not to lose bits. So before the shift we put the 128-bit
+// significand into a 256-bit signficand and then shift.
+// The 256-bit significand has four parts: hh, hl, lh, and ll.
+//
+// Start off with
+// hh hl lh ll
+// <64> <49><15_0> <64_0> <64_0>
+//
+// After shift by 21 (then return for significand_hi),
+// <43><21_0> <21><43> <6><58_0> <64_0>
+//
+// Take the hh part and convert to a double. There is no rounding here.
+// The conversion is exact. The true exponent of the high part is the same as the
+// true exponent of the input quad.
+//
+// We have some 64 plus significand bits for the low part. In this example, we have
+// 70 bits. We want to round this to a double. Put them in a quad and then do a quad fnorm.
+// For this example the true exponent of the low part is
+// true_exponent_of_high - 43 = true_exponent_of_high - (64-21)
+// In general, this is
+// true_exponent_of_high - (64 - shift_value)
+//
+//
+// Largest T,t
+// ----------
+// The largest T,t is
+// data8 0x3fe62643fecf9742, 0x3c9e3147684bd37d log(1/frcpa(1+255/256))= +6.92171e-001
+//
+// Table entry 256 is
+// 0 fffe b1321ff67cba178c 51da12f4df5a0000
+//
+// The shift value is
+// 63 - (51 -(0xffff - 0xfffe)) = 13
+//
+// The true exponent of the low part is
+// true_exponent_of_high - (64 - shift_value)
+// -1 - (64-13) = -52
+// Biased as a double, this is 0x3cb
+//
+//
+//
+// So then lsb(T) must be >= 2^-51
+// msb(Klog2_hi) <= 2^12
+//
+// +--------+---------+
+// | 51 bits | <== largest T
+// +--------+---------+
+// | 9 bits | 42 bits | <== smallest T
+// +------------+----------------+-+
+// | 13 bits | 50 bits | |
+// +------------+----------------+-+
+
+
+
+// Special Cases
+//==============================================================
+
+// double float
+// overflow error 24 30
+
+// underflow error 25 31
+
+// X zero Y zero
+// +0 +0 +1 error 26 32
+// -0 +0 +1 error 26 32
+// +0 -0 +1 error 26 32
+// -0 -0 +1 error 26 32
+
+// X zero Y negative
+// +0 -odd integer +inf error 27 33 divide-by-zero
+// -0 -odd integer -inf error 27 33 divide-by-zero
+// +0 !-odd integer +inf error 27 33 divide-by-zero
+// -0 !-odd integer +inf error 27 33 divide-by-zero
+// +0 -inf +inf error 27 33 divide-by-zero
+// -0 -inf +inf error 27 33 divide-by-zero
+
+// X zero Y positve
+// +0 +odd integer +0
+// -0 +odd integer -0
+// +0 !+odd integer +0
+// -0 !+odd integer +0
+// +0 +inf +0
+// -0 +inf +0
+// +0 Y NaN quiet Y invalid if Y SNaN
+// -0 Y NaN quiet Y invalid if Y SNaN
+
+// X one
+// -1 Y inf +1
+// -1 Y NaN quiet Y invalid if Y SNaN
+// +1 Y NaN +1 invalid if Y SNaN
+// +1 Y any else +1
+
+// X - Y not integer QNAN error 28 34 invalid
+
+// X NaN Y 0 +1 error 29 35
+// X NaN Y NaN quiet X invalid if X or Y SNaN
+// X NaN Y any else quiet X invalid if X SNaN
+// X !+1 Y NaN quiet Y invalid if Y SNaN
+
+
+// X +inf Y >0 +inf
+// X -inf Y >0, !odd integer +inf
+// X -inf Y >0, odd integer -inf
+
+// X +inf Y <0 +0
+// X -inf Y <0, !odd integer +0
+// X -inf Y <0, odd integer -0
+
+// X +inf Y =0 +1
+// X -inf Y =0 +1
+
+// |X|<1 Y +inf +0
+// |X|<1 Y -inf +inf
+// |X|>1 Y +inf +inf
+// |X|>1 Y -inf +0
+
+// X any Y =0 +1
+
+#include "libm_support.h"
+
+// Assembly macros
+//==============================================================
+
+// integer registers used
+
+pow_AD_Tt = r33
+pow_GR_FFF7 = r34
+pow_GR_exp_Y = r34 // duplicate
+pow_GR_17ones = r35
+
+pow_AD_P = r36
+pow_AD_Q = r37
+pow_AD_tbl1 = r38
+pow_AD_tbl2 = r39
+pow_GR_exp_X = r40
+pow_GR_true_exp_X = r40 // duplicate
+
+pow_GR_offset = r41
+pow_GR_exp_Xm1 = r42
+pow_GR_sig_X = r43
+pow_GR_signexp_X = r44
+
+pow_GR_signexp_Xm1 = r46
+pow_GR_int_W1 = r47
+pow_GR_int_W2 = r48
+pow_GR_int_N = r49
+pow_GR_index1 = r50
+
+pow_GR_index2 = r51
+pow_AD_T1 = r52
+pow_AD_T2 = r53
+pow_GR_gt_ln = r53 // duplicate
+pow_int_GR_M = r54
+pow_GR_10033 = r55
+
+pow_GR_16ones = r56
+pow_GR_sig_int_Y = r57
+pow_GR_sign_Y_Gpr = r58
+pow_GR_17ones_m1 = r59
+pow_GR_one = r60
+pow_GR_sign_Y = r60
+
+pow_GR_signexp_Y_Gpr = r61
+pow_GR_exp_Y_Gpr = r62
+pow_GR_true_exp_Y_Gpr = r63
+pow_GR_signexp_Y = r64
+
+GR_SAVE_B0 = r65
+GR_SAVE_GP = r66
+GR_SAVE_PFS = r67
+
+GR_Parameter_X = r68
+GR_Parameter_Y = r69
+GR_Parameter_RESULT = r70
+pow_GR_tag = r71
+
+
+// floating point registers used
+
+POW_B = f32
+POW_NORM_X = f33
+POW_Xm1 = f34
+POW_r1 = f34
+POW_P4 = f35
+
+POW_P5 = f36
+POW_NORM_Y = f37
+POW_Q2 = f38
+POW_Q3 = f39
+POW_P2 = f40
+
+POW_P3 = f41
+POW_P0 = f42
+POW_log2_lo = f43
+POW_r = f44
+POW_Q0_half = f45
+
+POW_Q1 = f46
+POW_log2_hi = f48
+POW_Q4 = f49
+POW_P1 = f50
+
+POW_log2_by_128_hi = f51
+POW_inv_log2_by_128 = f52
+POW_rsq = f53
+POW_Yrcub = f54
+POW_log2_by_128_lo = f55
+
+POW_v6 = f56
+POW_v4 = f58
+POW_v2 = f59
+POW_T = f60
+
+POW_Tt = f61
+POW_RSHF = f62
+POW_v21ps = f63
+POW_s4 = f64
+
+POW_U = f66
+POW_G = f67
+POW_delta = f68
+POW_v3 = f69
+POW_V = f70
+
+POW_p = f71
+POW_Z1 = f72
+POW_e3 = f73
+POW_e2 = f74
+POW_Z2 = f75
+
+POW_e1 = f76
+POW_W1 = f77
+POW_UmZ2 = f78
+POW_W2 = f79
+POW_Z3 = f80
+
+POW_int_W1 = f81
+POW_e12 = f82
+POW_int_W2 = f83
+POW_UmZ2pV = f84
+POW_Z3sq = f85
+
+POW_e123 = f86
+POW_N1float = f87
+POW_N2float = f88
+POW_f3 = f89
+POW_q = f90
+
+POW_s1 = f91
+POW_Nfloat = f92
+POW_s2 = f93
+POW_f2 = f94
+POW_f1 = f95
+
+POW_T1 = f96
+POW_T2 = f97
+POW_2M = f98
+POW_s = f99
+POW_f12 = f100
+
+POW_ssq = f101
+POW_T1T2 = f102
+POW_1ps = f103
+POW_A = f104
+POW_es = f105
+
+POW_int_K = f107
+POW_K = f108
+POW_f123 = f109
+POW_Gpr = f110
+
+POW_Y_Gpr = f111
+POW_int_Y = f112
+
+POW_float_int_Y = f116
+POW_ftz_urm_f8 = f117
+POW_wre_urm_f8 = f118
+POW_abs_A = f119
+POW_gt_pln = f120
+
+POW_xsq = f121
+
+POW_twoV = f122
+POW_Xp1 = f123
+
+// Data tables
+//==============================================================
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
+.align 16
+
+pow_table_P:
+ASM_TYPE_DIRECTIVE(pow_table_P,@object)
+data8 0x8000F7B249FF332D, 0x0000BFFC // P_5
+data8 0xAAAAAAA9E7902C7F, 0x0000BFFC // P_3
+data8 0x80000000000018E5, 0x0000BFFD // P_1
+data8 0xb8aa3b295c17f0bc, 0x00004006 // inv_ln2_by_128
+
+
+data8 0x3FA5555555554A9E // Q_2
+data8 0x3F8111124F4DD9F9 // Q_3
+data8 0x3FE0000000000000 // Q_0
+data8 0x3FC5555555554733 // Q_1
+data8 0x3F56C16D9360FFA0 // Q_4
+data8 0x43e8000000000000 // Right shift constant for exp
+data8 0xc9e3b39803f2f6af, 0x00003fb7 // ln2_by_128_lo
+data8 0x0000000000000000 // pad to eliminate bank conflicts with pow_table_Q
+data8 0x0000000000000000 // pad to eliminate bank conflicts with pow_table_Q
+ASM_SIZE_DIRECTIVE(pow_table_P)
+
+pow_table_Q:
+ASM_TYPE_DIRECTIVE(pow_table_Q,@object)
+data8 0x9249FE7F0DC423CF, 0x00003FFC // P_4
+data8 0xCCCCCCCC4ED2BA7F, 0x00003FFC // P_2
+data8 0xAAAAAAAAAAAAB505, 0x00003FFD // P_0
+data8 0x3fe62e42fefa39e8, 0x3cccd5e4f1d9cc02 // log2 hi lo = +6.93147e-001
+data8 0xb17217f7d1cf79ab, 0x00003ff7 // ln2_by_128_hi
+ASM_SIZE_DIRECTIVE(pow_table_Q)
+
+
+pow_Tt:
+ASM_TYPE_DIRECTIVE(pow_Tt,@object)
+data8 0x3f60040155d58800, 0x3c93bce0ce3ddd81 // log(1/frcpa(1+0/256))= +1.95503e-003
+data8 0x3f78121214586a00, 0x3cb540e0a5cfc9bc // log(1/frcpa(1+1/256))= +5.87661e-003
+data8 0x3f841929f9683200, 0x3cbdf1d57404da1f // log(1/frcpa(1+2/256))= +9.81362e-003
+data8 0x3f8c317384c75f00, 0x3c69806208c04c22 // log(1/frcpa(1+3/256))= +1.37662e-002
+data8 0x3f91a6b91ac73380, 0x3c7874daa716eb32 // log(1/frcpa(1+4/256))= +1.72376e-002
+data8 0x3f95ba9a5d9ac000, 0x3cacbb84e08d78ac // log(1/frcpa(1+5/256))= +2.12196e-002
+data8 0x3f99d2a807432580, 0x3cbcf80538b441e1 // log(1/frcpa(1+6/256))= +2.52177e-002
+data8 0x3f9d6b2725979800, 0x3c6095e5c8f8f359 // log(1/frcpa(1+7/256))= +2.87291e-002
+data8 0x3fa0c58fa19dfa80, 0x3cb4c5d4e9d0dda2 // log(1/frcpa(1+8/256))= +3.27573e-002
+data8 0x3fa2954c78cbce00, 0x3caa932b860ab8d6 // log(1/frcpa(1+9/256))= +3.62953e-002
+data8 0x3fa4a94d2da96c40, 0x3ca670452b76bbd5 // log(1/frcpa(1+10/256))= +4.03542e-002
+data8 0x3fa67c94f2d4bb40, 0x3ca84104f9941798 // log(1/frcpa(1+11/256))= +4.39192e-002
+data8 0x3fa85188b630f040, 0x3cb40a882cbf0153 // log(1/frcpa(1+12/256))= +4.74971e-002
+data8 0x3faa6b8abe73af40, 0x3c988d46e25c9059 // log(1/frcpa(1+13/256))= +5.16017e-002
+data8 0x3fac441e06f72a80, 0x3cae3e930a1a2a96 // log(1/frcpa(1+14/256))= +5.52072e-002
+data8 0x3fae1e6713606d00, 0x3c8a796f6283b580 // log(1/frcpa(1+15/256))= +5.88257e-002
+data8 0x3faffa6911ab9300, 0x3c5193070351e88a // log(1/frcpa(1+16/256))= +6.24574e-002
+data8 0x3fb0ec139c5da600, 0x3c623f2a75eb992d // log(1/frcpa(1+17/256))= +6.61022e-002
+data8 0x3fb1dbd2643d1900, 0x3ca649b2ef8927f0 // log(1/frcpa(1+18/256))= +6.97605e-002
+data8 0x3fb2cc7284fe5f00, 0x3cbc5e86599513e2 // log(1/frcpa(1+19/256))= +7.34321e-002
+data8 0x3fb3bdf5a7d1ee60, 0x3c90bd4bb69dada3 // log(1/frcpa(1+20/256))= +7.71173e-002
+data8 0x3fb4b05d7aa012e0, 0x3c54e377c9b8a54f // log(1/frcpa(1+21/256))= +8.08161e-002
+data8 0x3fb580db7ceb5700, 0x3c7fdb2f98354cde // log(1/frcpa(1+22/256))= +8.39975e-002
+data8 0x3fb674f089365a60, 0x3cb9994c9d3301c1 // log(1/frcpa(1+23/256))= +8.77219e-002
+data8 0x3fb769ef2c6b5680, 0x3caaec639db52a79 // log(1/frcpa(1+24/256))= +9.14602e-002
+data8 0x3fb85fd927506a40, 0x3c9f9f99a3cf8e25 // log(1/frcpa(1+25/256))= +9.52125e-002
+data8 0x3fb9335e5d594980, 0x3ca15c3abd47d99a // log(1/frcpa(1+26/256))= +9.84401e-002
+data8 0x3fba2b0220c8e5e0, 0x3cb4ca639adf6fc3 // log(1/frcpa(1+27/256))= +1.02219e-001
+data8 0x3fbb0004ac1a86a0, 0x3ca7cb81bf959a59 // log(1/frcpa(1+28/256))= +1.05469e-001
+data8 0x3fbbf968769fca00, 0x3cb0c646c121418e // log(1/frcpa(1+29/256))= +1.09274e-001
+data8 0x3fbccfedbfee13a0, 0x3ca0465fce24ab4b // log(1/frcpa(1+30/256))= +1.12548e-001
+data8 0x3fbda727638446a0, 0x3c82803f4e2e6603 // log(1/frcpa(1+31/256))= +1.15832e-001
+data8 0x3fbea3257fe10f60, 0x3cb986a3f2313d1a // log(1/frcpa(1+32/256))= +1.19677e-001
+data8 0x3fbf7be9fedbfde0, 0x3c97d16a6a621cf4 // log(1/frcpa(1+33/256))= +1.22985e-001
+data8 0x3fc02ab352ff25f0, 0x3c9cc6baad365600 // log(1/frcpa(1+34/256))= +1.26303e-001
+data8 0x3fc097ce579d2040, 0x3cb9ba16d329440b // log(1/frcpa(1+35/256))= +1.29633e-001
+data8 0x3fc1178e8227e470, 0x3cb7bc671683f8e6 // log(1/frcpa(1+36/256))= +1.33531e-001
+data8 0x3fc185747dbecf30, 0x3c9d1116f66d2345 // log(1/frcpa(1+37/256))= +1.36885e-001
+data8 0x3fc1f3b925f25d40, 0x3c8162c9ef939ac6 // log(1/frcpa(1+38/256))= +1.40250e-001
+data8 0x3fc2625d1e6ddf50, 0x3caad3a1ec384fc3 // log(1/frcpa(1+39/256))= +1.43627e-001
+data8 0x3fc2d1610c868130, 0x3cb3ad997036941b // log(1/frcpa(1+40/256))= +1.47015e-001
+data8 0x3fc340c597411420, 0x3cbc2308262c7998 // log(1/frcpa(1+41/256))= +1.50414e-001
+data8 0x3fc3b08b6757f2a0, 0x3cb2170d6cdf0526 // log(1/frcpa(1+42/256))= +1.53825e-001
+data8 0x3fc40dfb08378000, 0x3c9bb453c4f7b685 // log(1/frcpa(1+43/256))= +1.56677e-001
+data8 0x3fc47e74e8ca5f70, 0x3cb836a48fdfce9d // log(1/frcpa(1+44/256))= +1.60109e-001
+data8 0x3fc4ef51f6466de0, 0x3ca07a43919aa64b // log(1/frcpa(1+45/256))= +1.63553e-001
+data8 0x3fc56092e02ba510, 0x3ca85006899d97b0 // log(1/frcpa(1+46/256))= +1.67010e-001
+data8 0x3fc5d23857cd74d0, 0x3ca30a5ba6e7abbe // log(1/frcpa(1+47/256))= +1.70478e-001
+data8 0x3fc6313a37335d70, 0x3ca905586f0ac97e // log(1/frcpa(1+48/256))= +1.73377e-001
+data8 0x3fc6a399dabbd380, 0x3c9b2c6657a96684 // log(1/frcpa(1+49/256))= +1.76868e-001
+data8 0x3fc70337dd3ce410, 0x3cb50bc52f55cdd8 // log(1/frcpa(1+50/256))= +1.79786e-001
+data8 0x3fc77654128f6120, 0x3cad2eb7c9a39efe // log(1/frcpa(1+51/256))= +1.83299e-001
+data8 0x3fc7e9d82a0b0220, 0x3cba127e90393c01 // log(1/frcpa(1+52/256))= +1.86824e-001
+data8 0x3fc84a6b759f5120, 0x3cbd7fd52079f706 // log(1/frcpa(1+53/256))= +1.89771e-001
+data8 0x3fc8ab47d5f5a300, 0x3cbfae141751a3de // log(1/frcpa(1+54/256))= +1.92727e-001
+data8 0x3fc91fe490965810, 0x3cb69cf30a1c319e // log(1/frcpa(1+55/256))= +1.96286e-001
+data8 0x3fc981634011aa70, 0x3ca5bb3d208bc42a // log(1/frcpa(1+56/256))= +1.99261e-001
+data8 0x3fc9f6c407089660, 0x3ca04d68658179a0 // log(1/frcpa(1+57/256))= +2.02843e-001
+data8 0x3fca58e729348f40, 0x3c99f5411546c286 // log(1/frcpa(1+58/256))= +2.05838e-001
+data8 0x3fcabb55c31693a0, 0x3cb9a5350eb327d5 // log(1/frcpa(1+59/256))= +2.08842e-001
+data8 0x3fcb1e104919efd0, 0x3c18965fcce7c406 // log(1/frcpa(1+60/256))= +2.11855e-001
+data8 0x3fcb94ee93e367c0, 0x3cb503716da45184 // log(1/frcpa(1+61/256))= +2.15483e-001
+data8 0x3fcbf851c0675550, 0x3cbdf1b3f7ab5378 // log(1/frcpa(1+62/256))= +2.18516e-001
+data8 0x3fcc5c0254bf23a0, 0x3ca7aab9ed0b1d7b // log(1/frcpa(1+63/256))= +2.21558e-001
+data8 0x3fccc000c9db3c50, 0x3c92a7a2a850072a // log(1/frcpa(1+64/256))= +2.24609e-001
+data8 0x3fcd244d99c85670, 0x3c9f6019120edf4c // log(1/frcpa(1+65/256))= +2.27670e-001
+data8 0x3fcd88e93fb2f450, 0x3c6affb96815e081 // log(1/frcpa(1+66/256))= +2.30741e-001
+data8 0x3fcdedd437eaef00, 0x3c72553595897976 // log(1/frcpa(1+67/256))= +2.33820e-001
+data8 0x3fce530effe71010, 0x3c90913b020fa182 // log(1/frcpa(1+68/256))= +2.36910e-001
+data8 0x3fceb89a1648b970, 0x3c837ba4045bfd25 // log(1/frcpa(1+69/256))= +2.40009e-001
+data8 0x3fcf1e75fadf9bd0, 0x3cbcea6d13e0498d // log(1/frcpa(1+70/256))= +2.43117e-001
+data8 0x3fcf84a32ead7c30, 0x3ca5e3a67b3c6d77 // log(1/frcpa(1+71/256))= +2.46235e-001
+data8 0x3fcfeb2233ea07c0, 0x3cba0c6f0049c5a6 // log(1/frcpa(1+72/256))= +2.49363e-001
+data8 0x3fd028f9c7035c18, 0x3cb0a30b06677ff6 // log(1/frcpa(1+73/256))= +2.52501e-001
+data8 0x3fd05c8be0d96358, 0x3ca0f1c77ccb5865 // log(1/frcpa(1+74/256))= +2.55649e-001
+data8 0x3fd085eb8f8ae790, 0x3cbd513f45fe7a97 // log(1/frcpa(1+75/256))= +2.58174e-001
+data8 0x3fd0b9c8e32d1910, 0x3c927449047ca006 // log(1/frcpa(1+76/256))= +2.61339e-001
+data8 0x3fd0edd060b78080, 0x3c89b52d8435f53e // log(1/frcpa(1+77/256))= +2.64515e-001
+data8 0x3fd122024cf00638, 0x3cbdd976fabda4bd // log(1/frcpa(1+78/256))= +2.67701e-001
+data8 0x3fd14be2927aecd0, 0x3cb02f90ad0bc471 // log(1/frcpa(1+79/256))= +2.70257e-001
+data8 0x3fd180618ef18ad8, 0x3cbd003792c71a98 // log(1/frcpa(1+80/256))= +2.73461e-001
+data8 0x3fd1b50bbe2fc638, 0x3ca9ae64c6403ead // log(1/frcpa(1+81/256))= +2.76675e-001
+data8 0x3fd1df4cc7cf2428, 0x3cb43f0455f7e395 // log(1/frcpa(1+82/256))= +2.79254e-001
+data8 0x3fd214456d0eb8d0, 0x3cb0fbd748d75d30 // log(1/frcpa(1+83/256))= +2.82487e-001
+data8 0x3fd23ec5991eba48, 0x3c906edd746b77e2 // log(1/frcpa(1+84/256))= +2.85081e-001
+data8 0x3fd2740d9f870af8, 0x3ca9802e6a00a670 // log(1/frcpa(1+85/256))= +2.88333e-001
+data8 0x3fd29ecdabcdfa00, 0x3cacecef70890cfa // log(1/frcpa(1+86/256))= +2.90943e-001
+data8 0x3fd2d46602adcce8, 0x3cb97911955f3521 // log(1/frcpa(1+87/256))= +2.94214e-001
+data8 0x3fd2ff66b04ea9d0, 0x3cb12dabe191d1c9 // log(1/frcpa(1+88/256))= +2.96838e-001
+data8 0x3fd335504b355a30, 0x3cbdf9139df924ec // log(1/frcpa(1+89/256))= +3.00129e-001
+data8 0x3fd360925ec44f58, 0x3cb253e68977a1e3 // log(1/frcpa(1+90/256))= +3.02769e-001
+data8 0x3fd38bf1c3337e70, 0x3cb3d283d2a2da21 // log(1/frcpa(1+91/256))= +3.05417e-001
+data8 0x3fd3c25277333180, 0x3cadaa5b035eae27 // log(1/frcpa(1+92/256))= +3.08735e-001
+data8 0x3fd3edf463c16838, 0x3cb983d680d3c108 // log(1/frcpa(1+93/256))= +3.11399e-001
+data8 0x3fd419b423d5e8c0, 0x3cbc86dd921c139d // log(1/frcpa(1+94/256))= +3.14069e-001
+data8 0x3fd44591e0539f48, 0x3c86a76d6dc2782e // log(1/frcpa(1+95/256))= +3.16746e-001
+data8 0x3fd47c9175b6f0a8, 0x3cb59a2e013c6b5f // log(1/frcpa(1+96/256))= +3.20103e-001
+data8 0x3fd4a8b341552b08, 0x3c93f1e86e468694 // log(1/frcpa(1+97/256))= +3.22797e-001
+data8 0x3fd4d4f390890198, 0x3cbf5e4ea7c5105a // log(1/frcpa(1+98/256))= +3.25498e-001
+data8 0x3fd501528da1f960, 0x3cbf58da53e9ad10 // log(1/frcpa(1+99/256))= +3.28206e-001
+data8 0x3fd52dd06347d4f0, 0x3cb98a28cebf6eef // log(1/frcpa(1+100/256))= +3.30921e-001
+data8 0x3fd55a6d3c7b8a88, 0x3c9c76b67c2d1fd4 // log(1/frcpa(1+101/256))= +3.33644e-001
+data8 0x3fd5925d2b112a58, 0x3c9029616a4331b8 // log(1/frcpa(1+102/256))= +3.37058e-001
+data8 0x3fd5bf406b543db0, 0x3c9fb8292ecfc820 // log(1/frcpa(1+103/256))= +3.39798e-001
+data8 0x3fd5ec433d5c35a8, 0x3cb71a1229d17eec // log(1/frcpa(1+104/256))= +3.42545e-001
+data8 0x3fd61965cdb02c18, 0x3cbba94fe1dbb8d2 // log(1/frcpa(1+105/256))= +3.45300e-001
+data8 0x3fd646a84935b2a0, 0x3c9ee496d2c9ae57 // log(1/frcpa(1+106/256))= +3.48063e-001
+data8 0x3fd6740add31de90, 0x3cb1da3a6c7a9dfd // log(1/frcpa(1+107/256))= +3.50833e-001
+data8 0x3fd6a18db74a58c0, 0x3cb494c257add8dc // log(1/frcpa(1+108/256))= +3.53610e-001
+data8 0x3fd6cf31058670e8, 0x3cb0b244a70a8da9 // log(1/frcpa(1+109/256))= +3.56396e-001
+data8 0x3fd6f180e852f0b8, 0x3c9db7aefa866720 // log(1/frcpa(1+110/256))= +3.58490e-001
+data8 0x3fd71f5d71b894e8, 0x3cbe91c4bf324957 // log(1/frcpa(1+111/256))= +3.61289e-001
+data8 0x3fd74d5aefd66d58, 0x3cb06b3d9bfac023 // log(1/frcpa(1+112/256))= +3.64096e-001
+data8 0x3fd77b79922bd378, 0x3cb727d8804491f4 // log(1/frcpa(1+113/256))= +3.66911e-001
+data8 0x3fd7a9b9889f19e0, 0x3ca2ef22df5bc543 // log(1/frcpa(1+114/256))= +3.69734e-001
+data8 0x3fd7d81b037eb6a0, 0x3cb8fd3ba07a7ece // log(1/frcpa(1+115/256))= +3.72565e-001
+data8 0x3fd8069e33827230, 0x3c8bd1e25866e61a // log(1/frcpa(1+116/256))= +3.75404e-001
+data8 0x3fd82996d3ef8bc8, 0x3ca5aab9f5928928 // log(1/frcpa(1+117/256))= +3.77538e-001
+data8 0x3fd85855776dcbf8, 0x3ca56f33337789d6 // log(1/frcpa(1+118/256))= +3.80391e-001
+data8 0x3fd8873658327cc8, 0x3cbb8ef0401db49d // log(1/frcpa(1+119/256))= +3.83253e-001
+data8 0x3fd8aa75973ab8c8, 0x3cbb9961f509a680 // log(1/frcpa(1+120/256))= +3.85404e-001
+data8 0x3fd8d992dc8824e0, 0x3cb220512a53732d // log(1/frcpa(1+121/256))= +3.88280e-001
+data8 0x3fd908d2ea7d9510, 0x3c985f0e513bfb5c // log(1/frcpa(1+122/256))= +3.91164e-001
+data8 0x3fd92c59e79c0e50, 0x3cb82e073fd30d63 // log(1/frcpa(1+123/256))= +3.93332e-001
+data8 0x3fd95bd750ee3ed0, 0x3ca4aa7cdb6dd8a8 // log(1/frcpa(1+124/256))= +3.96231e-001
+data8 0x3fd98b7811a3ee58, 0x3caa93a5b660893e // log(1/frcpa(1+125/256))= +3.99138e-001
+data8 0x3fd9af47f33d4068, 0x3cac294b3b3190ba // log(1/frcpa(1+126/256))= +4.01323e-001
+data8 0x3fd9df270c1914a0, 0x3cbe1a58fd0cd67e // log(1/frcpa(1+127/256))= +4.04245e-001
+data8 0x3fda0325ed14fda0, 0x3cb1efa7950fb57e // log(1/frcpa(1+128/256))= +4.06442e-001
+data8 0x3fda33440224fa78, 0x3c8915fe75e7d477 // log(1/frcpa(1+129/256))= +4.09379e-001
+data8 0x3fda57725e80c380, 0x3ca72bd1062b1b7f // log(1/frcpa(1+130/256))= +4.11587e-001
+data8 0x3fda87d0165dd198, 0x3c91f7845f58dbad // log(1/frcpa(1+131/256))= +4.14539e-001
+data8 0x3fdaac2e6c03f890, 0x3cb6f237a911c509 // log(1/frcpa(1+132/256))= +4.16759e-001
+data8 0x3fdadccc6fdf6a80, 0x3c90ddc4b7687169 // log(1/frcpa(1+133/256))= +4.19726e-001
+data8 0x3fdb015b3eb1e790, 0x3c692dd7d90e1e8e // log(1/frcpa(1+134/256))= +4.21958e-001
+data8 0x3fdb323a3a635948, 0x3c6f85655cbe14de // log(1/frcpa(1+135/256))= +4.24941e-001
+data8 0x3fdb56fa04462908, 0x3c95252d841994de // log(1/frcpa(1+136/256))= +4.27184e-001
+data8 0x3fdb881aa659bc90, 0x3caa53a745a3642f // log(1/frcpa(1+137/256))= +4.30182e-001
+data8 0x3fdbad0bef3db160, 0x3cb32f2540dcc16a // log(1/frcpa(1+138/256))= +4.32437e-001
+data8 0x3fdbd21297781c28, 0x3cbd8e891e106f1d // log(1/frcpa(1+139/256))= +4.34697e-001
+data8 0x3fdc039236f08818, 0x3c809435af522ba7 // log(1/frcpa(1+140/256))= +4.37718e-001
+data8 0x3fdc28cb1e4d32f8, 0x3cb3944752fbd81e // log(1/frcpa(1+141/256))= +4.39990e-001
+data8 0x3fdc4e19b84723c0, 0x3c9a465260cd3fe5 // log(1/frcpa(1+142/256))= +4.42267e-001
+data8 0x3fdc7ff9c74554c8, 0x3c92447d5b6ca369 // log(1/frcpa(1+143/256))= +4.45311e-001
+data8 0x3fdca57b64e9db00, 0x3cb44344a8a00c82 // log(1/frcpa(1+144/256))= +4.47600e-001
+data8 0x3fdccb130a5ceba8, 0x3cbefaddfb97b73f // log(1/frcpa(1+145/256))= +4.49895e-001
+data8 0x3fdcf0c0d18f3268, 0x3cbd3e7bfee57898 // log(1/frcpa(1+146/256))= +4.52194e-001
+data8 0x3fdd232075b5a200, 0x3c9222599987447c // log(1/frcpa(1+147/256))= +4.55269e-001
+data8 0x3fdd490246defa68, 0x3cabafe9a767a80d // log(1/frcpa(1+148/256))= +4.57581e-001
+data8 0x3fdd6efa918d25c8, 0x3cb58a2624e1c6fd // log(1/frcpa(1+149/256))= +4.59899e-001
+data8 0x3fdd9509707ae528, 0x3cbdc3babce578e7 // log(1/frcpa(1+150/256))= +4.62221e-001
+data8 0x3fddbb2efe92c550, 0x3cb0ac0943c434a4 // log(1/frcpa(1+151/256))= +4.64550e-001
+data8 0x3fddee2f3445e4a8, 0x3cbba9d07ce820e8 // log(1/frcpa(1+152/256))= +4.67663e-001
+data8 0x3fde148a1a2726c8, 0x3cb6537e3375b205 // log(1/frcpa(1+153/256))= +4.70004e-001
+data8 0x3fde3afc0a49ff38, 0x3cbfed5518dbc20e // log(1/frcpa(1+154/256))= +4.72350e-001
+data8 0x3fde6185206d5168, 0x3cb6572601f73d5c // log(1/frcpa(1+155/256))= +4.74702e-001
+data8 0x3fde882578823d50, 0x3c9b24abd4584d1a // log(1/frcpa(1+156/256))= +4.77060e-001
+data8 0x3fdeaedd2eac9908, 0x3cb0ceb5e4d2c8f7 // log(1/frcpa(1+157/256))= +4.79423e-001
+data8 0x3fded5ac5f436be0, 0x3ca72f21f1f5238e // log(1/frcpa(1+158/256))= +4.81792e-001
+data8 0x3fdefc9326d16ab8, 0x3c85081a1639a45c // log(1/frcpa(1+159/256))= +4.84166e-001
+data8 0x3fdf2391a21575f8, 0x3cbf11015bdd297a // log(1/frcpa(1+160/256))= +4.86546e-001
+data8 0x3fdf4aa7ee031928, 0x3cb3795bc052a2d1 // log(1/frcpa(1+161/256))= +4.88932e-001
+data8 0x3fdf71d627c30bb0, 0x3c35c61f0f5a88f3 // log(1/frcpa(1+162/256))= +4.91323e-001
+data8 0x3fdf991c6cb3b378, 0x3c97d99419be6028 // log(1/frcpa(1+163/256))= +4.93720e-001
+data8 0x3fdfc07ada69a908, 0x3cbfe9341ded70b1 // log(1/frcpa(1+164/256))= +4.96123e-001
+data8 0x3fdfe7f18eb03d38, 0x3cb85718a640c33f // log(1/frcpa(1+165/256))= +4.98532e-001
+data8 0x3fe007c053c5002c, 0x3cb3addc9c065f09 // log(1/frcpa(1+166/256))= +5.00946e-001
+data8 0x3fe01b942198a5a0, 0x3c9d5aa4c77da6ac // log(1/frcpa(1+167/256))= +5.03367e-001
+data8 0x3fe02f74400c64e8, 0x3cb5a0ee4450ef52 // log(1/frcpa(1+168/256))= +5.05793e-001
+data8 0x3fe04360be7603ac, 0x3c9dd00c35630fe0 // log(1/frcpa(1+169/256))= +5.08225e-001
+data8 0x3fe05759ac47fe30, 0x3cbd063e1f0bd82c // log(1/frcpa(1+170/256))= +5.10663e-001
+data8 0x3fe06b5f1911cf50, 0x3cae8da674af5289 // log(1/frcpa(1+171/256))= +5.13107e-001
+data8 0x3fe078bf0533c568, 0x3c62241edf5fd1f7 // log(1/frcpa(1+172/256))= +5.14740e-001
+data8 0x3fe08cd9687e7b0c, 0x3cb3007febcca227 // log(1/frcpa(1+173/256))= +5.17194e-001
+data8 0x3fe0a10074cf9018, 0x3ca496e84603816b // log(1/frcpa(1+174/256))= +5.19654e-001
+data8 0x3fe0b5343a234474, 0x3cb46098d14fc90a // log(1/frcpa(1+175/256))= +5.22120e-001
+data8 0x3fe0c974c89431cc, 0x3cac0a7cdcbb86c6 // log(1/frcpa(1+176/256))= +5.24592e-001
+data8 0x3fe0ddc2305b9884, 0x3cb2f753210410ff // log(1/frcpa(1+177/256))= +5.27070e-001
+data8 0x3fe0eb524bafc918, 0x3c88affd6682229e // log(1/frcpa(1+178/256))= +5.28726e-001
+data8 0x3fe0ffb54213a474, 0x3cadeefbab9af993 // log(1/frcpa(1+179/256))= +5.31214e-001
+data8 0x3fe114253da97d9c, 0x3cbaf1c2b8bc160a // log(1/frcpa(1+180/256))= +5.33709e-001
+data8 0x3fe128a24f1d9afc, 0x3cb9cf4df375e650 // log(1/frcpa(1+181/256))= +5.36210e-001
+data8 0x3fe1365252bf0864, 0x3c985a621d4be111 // log(1/frcpa(1+182/256))= +5.37881e-001
+data8 0x3fe14ae558b4a92c, 0x3ca104c4aa8977d1 // log(1/frcpa(1+183/256))= +5.40393e-001
+data8 0x3fe15f85a19c7658, 0x3cbadf26e540f375 // log(1/frcpa(1+184/256))= +5.42910e-001
+data8 0x3fe16d4d38c119f8, 0x3cb3aea11caec416 // log(1/frcpa(1+185/256))= +5.44592e-001
+data8 0x3fe18203c20dd130, 0x3cba82d1211d1d6d // log(1/frcpa(1+186/256))= +5.47121e-001
+data8 0x3fe196c7bc4b1f38, 0x3cb6267acc4f4f4a // log(1/frcpa(1+187/256))= +5.49656e-001
+data8 0x3fe1a4a738b7a33c, 0x3c858930213c987d // log(1/frcpa(1+188/256))= +5.51349e-001
+data8 0x3fe1b981c0c9653c, 0x3c9bc2a4a30f697b // log(1/frcpa(1+189/256))= +5.53895e-001
+data8 0x3fe1ce69e8bb1068, 0x3cb7ae6199cf2a00 // log(1/frcpa(1+190/256))= +5.56447e-001
+data8 0x3fe1dc619de06944, 0x3c6b50bb38388177 // log(1/frcpa(1+191/256))= +5.58152e-001
+data8 0x3fe1f160a2ad0da0, 0x3cbd05b2778a5e1d // log(1/frcpa(1+192/256))= +5.60715e-001
+data8 0x3fe2066d7740737c, 0x3cb32e828f9c6bd6 // log(1/frcpa(1+193/256))= +5.63285e-001
+data8 0x3fe2147dba47a390, 0x3cbd579851b8b672 // log(1/frcpa(1+194/256))= +5.65001e-001
+data8 0x3fe229a1bc5ebac0, 0x3cbb321be5237ce8 // log(1/frcpa(1+195/256))= +5.67582e-001
+data8 0x3fe237c1841a502c, 0x3cb3b56e0915ea64 // log(1/frcpa(1+196/256))= +5.69306e-001
+data8 0x3fe24cfce6f80d98, 0x3cb34a4d1a422919 // log(1/frcpa(1+197/256))= +5.71898e-001
+data8 0x3fe25b2c55cd5760, 0x3cb237401ea5015e // log(1/frcpa(1+198/256))= +5.73630e-001
+data8 0x3fe2707f4d5f7c40, 0x3c9d30f20acc8341 // log(1/frcpa(1+199/256))= +5.76233e-001
+data8 0x3fe285e0842ca380, 0x3cbc4d866d5f21c0 // log(1/frcpa(1+200/256))= +5.78842e-001
+data8 0x3fe294294708b770, 0x3cb85e14d5dc54fa // log(1/frcpa(1+201/256))= +5.80586e-001
+data8 0x3fe2a9a2670aff0c, 0x3c7e6f8f468bbf91 // log(1/frcpa(1+202/256))= +5.83207e-001
+data8 0x3fe2b7fb2c8d1cc0, 0x3c930ffcf63c8b65 // log(1/frcpa(1+203/256))= +5.84959e-001
+data8 0x3fe2c65a6395f5f4, 0x3ca0afe20b53d2d2 // log(1/frcpa(1+204/256))= +5.86713e-001
+data8 0x3fe2dbf557b0df40, 0x3cb646be1188fbc9 // log(1/frcpa(1+205/256))= +5.89350e-001
+data8 0x3fe2ea64c3f97654, 0x3c96516fa8df33b2 // log(1/frcpa(1+206/256))= +5.91113e-001
+data8 0x3fe3001823684d70, 0x3cb96d64e16d1360 // log(1/frcpa(1+207/256))= +5.93762e-001
+data8 0x3fe30e97e9a8b5cc, 0x3c98ef96bc97cca0 // log(1/frcpa(1+208/256))= +5.95531e-001
+data8 0x3fe32463ebdd34e8, 0x3caef1dc9a56c1bf // log(1/frcpa(1+209/256))= +5.98192e-001
+data8 0x3fe332f4314ad794, 0x3caa4f0ac5d5fa11 // log(1/frcpa(1+210/256))= +5.99970e-001
+data8 0x3fe348d90e7464cc, 0x3cbe7889f0516acd // log(1/frcpa(1+211/256))= +6.02643e-001
+data8 0x3fe35779f8c43d6c, 0x3ca96bbab7245411 // log(1/frcpa(1+212/256))= +6.04428e-001
+data8 0x3fe36621961a6a98, 0x3ca31f32262db9fb // log(1/frcpa(1+213/256))= +6.06217e-001
+data8 0x3fe37c299f3c3668, 0x3cb15c72c107ee29 // log(1/frcpa(1+214/256))= +6.08907e-001
+data8 0x3fe38ae2171976e4, 0x3cba42a2554b2dd4 // log(1/frcpa(1+215/256))= +6.10704e-001
+data8 0x3fe399a157a603e4, 0x3cb99c62286d8919 // log(1/frcpa(1+216/256))= +6.12504e-001
+data8 0x3fe3afccfe77b9d0, 0x3ca11048f96a43bd // log(1/frcpa(1+217/256))= +6.15210e-001
+data8 0x3fe3be9d503533b4, 0x3ca4022f47588c3e // log(1/frcpa(1+218/256))= +6.17018e-001
+data8 0x3fe3cd7480b4a8a0, 0x3cb4ba7afc2dc56a // log(1/frcpa(1+219/256))= +6.18830e-001
+data8 0x3fe3e3c43918f76c, 0x3c859673d064b8ba // log(1/frcpa(1+220/256))= +6.21554e-001
+data8 0x3fe3f2acb27ed6c4, 0x3cb55c6b452a16a8 // log(1/frcpa(1+221/256))= +6.23373e-001
+data8 0x3fe4019c2125ca90, 0x3cb8c367879c5a31 // log(1/frcpa(1+222/256))= +6.25197e-001
+data8 0x3fe4181061389720, 0x3cb2c17a79c5cc6c // log(1/frcpa(1+223/256))= +6.27937e-001
+data8 0x3fe42711518df544, 0x3ca5f38d47012fc5 // log(1/frcpa(1+224/256))= +6.29769e-001
+data8 0x3fe436194e12b6bc, 0x3cb9854d65a9b426 // log(1/frcpa(1+225/256))= +6.31604e-001
+data8 0x3fe445285d68ea68, 0x3ca3ff9b3a81cd81 // log(1/frcpa(1+226/256))= +6.33442e-001
+data8 0x3fe45bcc464c8938, 0x3cb0a2d8011a6c05 // log(1/frcpa(1+227/256))= +6.36206e-001
+data8 0x3fe46aed21f117fc, 0x3c8a2be41f8e9f3d // log(1/frcpa(1+228/256))= +6.38053e-001
+data8 0x3fe47a1527e8a2d0, 0x3cba4a83594fab09 // log(1/frcpa(1+229/256))= +6.39903e-001
+data8 0x3fe489445efffcc8, 0x3cbf306a23dcbcde // log(1/frcpa(1+230/256))= +6.41756e-001
+data8 0x3fe4a018bcb69834, 0x3ca46c9285029fd1 // log(1/frcpa(1+231/256))= +6.44543e-001
+data8 0x3fe4af5a0c9d65d4, 0x3cbbc1db897580e3 // log(1/frcpa(1+232/256))= +6.46405e-001
+data8 0x3fe4bea2a5bdbe84, 0x3cb84d880d7ef775 // log(1/frcpa(1+233/256))= +6.48271e-001
+data8 0x3fe4cdf28f10ac44, 0x3cb3ec4b7893ce1f // log(1/frcpa(1+234/256))= +6.50140e-001
+data8 0x3fe4dd49cf994058, 0x3c897224d59d3408 // log(1/frcpa(1+235/256))= +6.52013e-001
+data8 0x3fe4eca86e64a680, 0x3cbccf620f24f0cd // log(1/frcpa(1+236/256))= +6.53889e-001
+data8 0x3fe503c43cd8eb68, 0x3c3f872c65971084 // log(1/frcpa(1+237/256))= +6.56710e-001
+data8 0x3fe513356667fc54, 0x3cb9ca64cc3d52c8 // log(1/frcpa(1+238/256))= +6.58595e-001
+data8 0x3fe522ae0738a3d4, 0x3cbe708164c75968 // log(1/frcpa(1+239/256))= +6.60483e-001
+data8 0x3fe5322e26867854, 0x3cb9988ba4aea615 // log(1/frcpa(1+240/256))= +6.62376e-001
+data8 0x3fe541b5cb979808, 0x3ca1662e3a6b95f5 // log(1/frcpa(1+241/256))= +6.64271e-001
+data8 0x3fe55144fdbcbd60, 0x3cb3acd4ca45c1e0 // log(1/frcpa(1+242/256))= +6.66171e-001
+data8 0x3fe560dbc45153c4, 0x3cb4988947959fed // log(1/frcpa(1+243/256))= +6.68074e-001
+data8 0x3fe5707a26bb8c64, 0x3cb3017fe6607ba9 // log(1/frcpa(1+244/256))= +6.69980e-001
+data8 0x3fe587f60ed5b8fc, 0x3cbe7a3266366ed4 // log(1/frcpa(1+245/256))= +6.72847e-001
+data8 0x3fe597a7977c8f30, 0x3ca1e12b9959a90e // log(1/frcpa(1+246/256))= +6.74763e-001
+data8 0x3fe5a760d634bb88, 0x3cb7c365e53d9602 // log(1/frcpa(1+247/256))= +6.76682e-001
+data8 0x3fe5b721d295f10c, 0x3cb716c2551ccbf0 // log(1/frcpa(1+248/256))= +6.78605e-001
+data8 0x3fe5c6ea94431ef8, 0x3ca02b2ed0e28261 // log(1/frcpa(1+249/256))= +6.80532e-001
+data8 0x3fe5d6bb22ea86f4, 0x3caf43a8bbb2f974 // log(1/frcpa(1+250/256))= +6.82462e-001
+data8 0x3fe5e6938645d38c, 0x3cbcedc98821b333 // log(1/frcpa(1+251/256))= +6.84397e-001
+data8 0x3fe5f673c61a2ed0, 0x3caa385eef5f2789 // log(1/frcpa(1+252/256))= +6.86335e-001
+data8 0x3fe6065bea385924, 0x3cb11624f165c5b4 // log(1/frcpa(1+253/256))= +6.88276e-001
+data8 0x3fe6164bfa7cc068, 0x3cbad884f87073fa // log(1/frcpa(1+254/256))= +6.90222e-001
+data8 0x3fe62643fecf9740, 0x3cb78c51da12f4df // log(1/frcpa(1+255/256))= +6.92171e-001
+ASM_SIZE_DIRECTIVE(pow_Tt)
+
+
+// Table 1 is 2^(index_1/128) where
+// index_1 goes from 0 to 15
+pow_tbl1:
+ASM_TYPE_DIRECTIVE(pow_tbl1,@object)
+data8 0x8000000000000000 , 0x00003FFF
+data8 0x80B1ED4FD999AB6C , 0x00003FFF
+data8 0x8164D1F3BC030773 , 0x00003FFF
+data8 0x8218AF4373FC25EC , 0x00003FFF
+data8 0x82CD8698AC2BA1D7 , 0x00003FFF
+data8 0x8383594EEFB6EE37 , 0x00003FFF
+data8 0x843A28C3ACDE4046 , 0x00003FFF
+data8 0x84F1F656379C1A29 , 0x00003FFF
+data8 0x85AAC367CC487B15 , 0x00003FFF
+data8 0x8664915B923FBA04 , 0x00003FFF
+data8 0x871F61969E8D1010 , 0x00003FFF
+data8 0x87DB357FF698D792 , 0x00003FFF
+data8 0x88980E8092DA8527 , 0x00003FFF
+data8 0x8955EE03618E5FDD , 0x00003FFF
+data8 0x8A14D575496EFD9A , 0x00003FFF
+data8 0x8AD4C6452C728924 , 0x00003FFF
+ASM_SIZE_DIRECTIVE(pow_tbl1)
+
+
+// Table 2 is 2^(index_1/8) where
+// index_2 goes from 0 to 7
+pow_tbl2:
+ASM_TYPE_DIRECTIVE(pow_tbl2,@object)
+data8 0x8000000000000000 , 0x00003FFF
+data8 0x8B95C1E3EA8BD6E7 , 0x00003FFF
+data8 0x9837F0518DB8A96F , 0x00003FFF
+data8 0xA5FED6A9B15138EA , 0x00003FFF
+data8 0xB504F333F9DE6484 , 0x00003FFF
+data8 0xC5672A115506DADD , 0x00003FFF
+data8 0xD744FCCAD69D6AF4 , 0x00003FFF
+data8 0xEAC0C6E7DD24392F , 0x00003FFF
+ASM_SIZE_DIRECTIVE(pow_tbl2)
+
+.global powf
+
+.section .text
+.proc powf
+.align 32
+
+powf:
+
+{ .mfi
+ alloc r32=ar.pfs,1,35,4,0
+ fms.s1 POW_Xm1 = f8,f1,f1 // Will be used for r1 if x>0
+ mov pow_GR_17ones = 0x1FFFF
+}
+{ .mfi
+(p0) addl pow_AD_P = @ltoff(pow_table_P), gp
+ fma.s1 POW_Xp1 = f8,f1,f1 // Will be used for r1 if x<0
+ nop.i 999
+;;
+}
+
+
+// Get exponent of x. Will be used to calculate K.
+{ .mfi
+ getf.exp pow_GR_signexp_X = f8
+ frcpa.s1 POW_B, p6 = f1,f8
+ nop.i 999
+}
+{ .mfi
+ ld8 pow_AD_P = [pow_AD_P]
+ fma.s1 POW_NORM_X = f8,f1,f0
+ mov pow_GR_FFF7 = 0xFFF7
+}
+;;
+
+
+
+// Get significand of x. Will be used to get index to fetch T, Tt.
+// p13 = TRUE ==> X is unorm
+// DOUBLE 0x10033 exponent limit at which y is an integer
+// SINGLE 0x10016
+{ .mfi
+ getf.sig pow_GR_sig_X = f8
+ fclass.m p13,p0 = f8, 0x0b // Test for x unorm
+ addl pow_GR_10033 = 0x10033, r0
+}
+{ .mfi
+ mov pow_GR_16ones = 0xFFFF
+ fma.s1 POW_NORM_Y = f9,f1,f0
+ nop.i 999
+}
+;;
+
+
+// p14 = TRUE ==> X is ZERO
+{ .mfi
+ adds pow_AD_Tt = pow_Tt - pow_table_P, pow_AD_P
+ fclass.m p14,p15 = f8, 0x07
+ and pow_GR_exp_X = pow_GR_signexp_X, pow_GR_17ones
+}
+{ .mfi
+ adds pow_AD_Q = pow_table_Q - pow_table_P, pow_AD_P
+ nop.f 999
+ nop.i 999
+}
+;;
+
+{ .mfi
+ ldfe POW_P5 = [pow_AD_P], 16
+ fcmp.lt.s1 p8,p9 = f8, f0 // Test for x<0
+ shl pow_GR_offset = pow_GR_sig_X, 1
+}
+{ .mib
+ ldfe POW_P4 = [pow_AD_Q], 16
+ sub pow_GR_true_exp_X = pow_GR_exp_X, pow_GR_16ones
+(p13) br.cond.spnt L(POW_X_DENORM)
+}
+;;
+
+
+// Continue normal and denormal paths here
+L(POW_COMMON):
+// p11 = TRUE ==> Y is a NAN
+{ .mfi
+ ldfe POW_P3 = [pow_AD_P], 16
+ fclass.m.unc p11,p0 = f9, 0xc3
+ shr.u pow_GR_offset = pow_GR_offset,56
+}
+{ .mfi
+ ldfe POW_P2 = [pow_AD_Q], 16
+ nop.f 999
+ nop.i 999
+}
+;;
+
+
+
+// Compute xsq to decide later if |x|=1
+// p11 = TRUE ==> Y is a NaN
+{ .mfi
+ setf.sig POW_int_K = pow_GR_true_exp_X
+(p15) fms.s1 POW_r = POW_B, POW_NORM_X,f1
+ shladd pow_AD_Tt = pow_GR_offset, 4, pow_AD_Tt
+}
+{ .mfi
+ nop.m 999
+(p8) fnma.s1 POW_Xm1 = POW_Xp1,f1,f0
+ nop.i 999
+}
+;;
+
+
+
+// p12 = TRUE ==> X is ZERO and Y is ZERO
+{ .mfi
+ ldfe POW_P1 = [pow_AD_P], 16
+(p14) fclass.m.unc p12,p0 = f9, 0x07
+ nop.i 999
+}
+{ .mfb
+ ldfe POW_P0 = [pow_AD_Q], 16
+ fma.s1 POW_xsq = POW_NORM_X, POW_NORM_X, f0
+(p11) br.cond.spnt L(POW_Y_NAN)
+}
+;;
+
+
+.pred.rel "mutex",p8,p9
+// Get exponent of |x|-1 to use in comparison to 2^-8
+{ .mmf
+(p8) getf.exp pow_GR_signexp_Xm1 = POW_Xp1
+(p9) getf.exp pow_GR_signexp_Xm1 = POW_Xm1
+ fcvt.fx.s1 POW_int_Y = POW_NORM_Y
+}
+;;
+
+
+// p11 = TRUE ==> X is a NAN
+{ .mfi
+ ldfpd POW_log2_hi, POW_log2_lo = [pow_AD_Q], 16
+ fclass.m.unc p11,p0 = f8, 0xc3
+ nop.i 999
+}
+{ .mib
+ ldfpd POW_T, POW_Tt = [pow_AD_Tt], 16
+ nop.i 999
+(p12) br.cond.spnt L(POW_X_0_Y_0)
+}
+;;
+
+
+// p14 = TRUE ==> X is zero
+// p15 = TRUE ==> X is zero AND Y is negative
+// p10 = TRUE ==> X is zero AND Y is >= zero
+{ .mfi
+ ldfe POW_inv_log2_by_128 = [pow_AD_P], 16
+(p14) fcmp.lt.unc.s1 p15, p10 = f9,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ nop.f 999
+ and pow_GR_exp_Xm1 = pow_GR_signexp_Xm1, pow_GR_17ones
+}
+;;
+
+
+// Determine if we will use the |x| near 1 path (p6) or normal path (p7)
+// p12 = TRUE ==> X is a NAN and Y is a zero
+// p13 = TRUE ==> X is a NAN and Y is anything else
+{ .mfi
+ getf.exp pow_GR_signexp_Y = POW_NORM_Y
+(p11) fclass.m.unc p12,p13 = f9, 0x07
+ cmp.lt.unc p6,p7 = pow_GR_exp_Xm1, pow_GR_FFF7
+}
+{ .mfi
+ ldfpd POW_Q2, POW_Q3 = [pow_AD_P], 16
+ fma.s1 POW_rsq = POW_r, POW_r,f0
+ nop.i 999
+;;
+}
+
+// If on the x near 1 path, assign r1 to r and r1*r1 to rsq
+{ .mfi
+ ldfpd POW_Q0_half, POW_Q1 = [pow_AD_P], 16
+(p6) fma.s1 POW_r = POW_r1, f1, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p6) fma.s1 POW_rsq = POW_r1, POW_r1, f0
+ nop.i 999
+;;
+}
+
+
+{ .mfi
+ ldfpd POW_Q4, POW_RSHF = [pow_AD_P], 16
+(p7) fma.s1 POW_v6 = POW_r, POW_P5, POW_P4
+ and pow_GR_exp_Y = pow_GR_signexp_Y, pow_GR_17ones
+}
+{ .mfb
+ nop.m 999
+(p6) fma.s1 POW_v6 = POW_r1, POW_P5, POW_P4
+(p12) br.cond.spnt L(POW_X_NAN_Y_0)
+}
+;;
+
+
+{ .mfi
+ nop.m 999
+(p7) fma.s1 POW_v4 = POW_P3, POW_r, POW_P2
+ andcm pow_GR_sign_Y = pow_GR_signexp_Y, pow_GR_17ones
+}
+{ .mfb
+ nop.m 999
+(p6) fma.s1 POW_v4 = POW_P3, POW_r1, POW_P2
+(p12) br.cond.spnt L(POW_X_NAN_Y_0)
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fcvt.xf POW_K = POW_int_K
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p13) fma.s f8 = f8,f1,f0
+(p13) br.ret.spnt b0 // Exit if x nan, y anything but zero
+}
+;;
+
+// p10 = TRUE ==> X is zero AND Y is positive
+// p8 = TRUE ==> X is zero AND Y is outside integer range (treat as even int)
+// return +0
+// p9 = TRUE ==> X is zero AND Y is within integer range (may not be integer)
+{ .mfi
+(p10) cmp.gt.unc p8,p9 = pow_GR_exp_Y, pow_GR_10033
+(p6) fmerge.s POW_delta = f0,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p6) fma.s1 POW_G = f0,f0,f0
+ nop.i 999
+}
+;;
+
+{ .mfi
+ getf.sig pow_GR_sig_int_Y = POW_int_Y
+ fnma.s1 POW_twoV = POW_NORM_Y, POW_rsq,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_U = POW_NORM_Y,POW_r,f0
+ nop.i 999
+}
+;;
+
+{ .mfi
+ ldfe POW_log2_by_128_lo = [pow_AD_P], 16
+(p6) fma.s1 POW_v2 = POW_P1, POW_r1, POW_P0
+ nop.i 999
+}
+{ .mfi
+ ldfe POW_log2_by_128_hi = [pow_AD_Q], 16
+(p7) fma.s1 POW_v2 = POW_P1, POW_r, POW_P0
+ nop.i 999
+}
+;;
+
+
+{ .mfi
+ nop.m 999
+ fcvt.xf POW_float_int_Y = POW_int_Y
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_v3 = POW_v6, POW_rsq, POW_v4
+ adds pow_AD_tbl1 = pow_tbl1 - pow_Tt, pow_AD_Q
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p7) fma.s1 POW_delta = POW_K, POW_log2_lo, POW_Tt
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p7) fma.s1 POW_G = POW_K, POW_log2_hi, POW_T
+ adds pow_AD_tbl2 = pow_tbl2 - pow_tbl1, pow_AD_tbl1
+}
+;;
+
+
+{ .mfi
+ nop.m 999
+ fms.s1 POW_e2 = POW_NORM_Y, POW_r, POW_U
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_Z2 = POW_twoV, POW_Q0_half, POW_U
+ nop.i 999
+}
+;;
+
+// p11 = TRUE ==> X is NEGATIVE
+// p8 = TRUE ==> X is zero AND Y is outside intger range (treat as even int)
+// return +0
+{ .mfi
+ nop.m 999
+ fclass.m.unc p11,p0 = f8, 0x1a
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p8) fma.s f8 = f0,f0,f0
+(p8) br.ret.spnt b0
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 POW_Yrcub = POW_rsq, POW_U, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_p = POW_rsq, POW_v3, POW_v2
+ nop.i 999
+}
+;;
+
+
+// p11 = TRUE ==> X is NEGATIVE
+// p12 = TRUE ==> X is NEGATIVE AND Y already int
+// p13 = TRUE ==> X is NEGATIVE AND Y possible int
+{ .mfi
+ nop.m 999
+ fma.s1 POW_Z1 = POW_NORM_Y, POW_G, f0
+(p11) cmp.ge.unc p12,p13 = pow_GR_exp_Y, pow_GR_10033
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_e3 = POW_NORM_Y, POW_delta, f0
+ nop.i 999
+}
+;;
+
+// p9 = TRUE ==> X is zero AND Y is within integer range (may not be integer)
+// p6 = TRUE ==> X is zero AND Y is an integer (may be even or odd)
+// p7 = TRUE ==> X is zero AND Y is NOT an integer, return +0
+{ .mfi
+ nop.m 999
+(p9) fcmp.eq.unc.s1 p6,p7 = POW_float_int_Y, POW_NORM_Y
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_Gpr = POW_G, f1, POW_r
+ nop.i 999
+}
+;;
+
+// By adding RSHF (1.1000...*2^63) we put integer part in rightmost significand
+{ .mfi
+ nop.m 999
+ fma.s1 POW_W2 = POW_Z2, POW_inv_log2_by_128, POW_RSHF
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fms.s1 POW_UmZ2 = POW_U, f1, POW_Z2
+ nop.i 999
+}
+;;
+
+
+// If x=0 and y>0, test y and flag denormal
+// p6 = TRUE ==> X is zero AND Y is an integer (may be even or odd)
+// p8 = TRUE ==> X is zero AND Y is an odd integer
+// p9 = TRUE ==> X is zero AND Y is an even integer
+{ .mfi
+ nop.m 999
+(p10) fcmp.eq.s0 p15,p0 = f9,f0
+(p6) tbit.nz.unc p8,p9 = pow_GR_sig_int_Y,0
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_Z3 = POW_p, POW_Yrcub, f0
+ nop.i 999
+}
+;;
+
+// By adding RSHF (1.1000...*2^63) we put integer part in rightmost significand
+{ .mfi
+ nop.m 999
+ fms.s1 POW_e1 = POW_NORM_Y, POW_G, POW_Z1
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_W1 = POW_Z1, POW_inv_log2_by_128, POW_RSHF
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p7) fma.s f8 = f0,f0,f0 // Result +0 if x zero and y not integer
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+ fma.s1 POW_Y_Gpr = POW_NORM_Y, POW_Gpr, f0
+(p8) br.ret.spnt b0 // Exit if x zero and y odd integer
+}
+;;
+
+// By subtracting RSHF we get rounded integer POW_N2float
+// p15 = TRUE ==> X_0_Y_NEG
+{ .mfi
+ nop.m 999
+ fms.s1 POW_N2float = POW_W2, f1, POW_RSHF
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+ fma.s1 POW_UmZ2pV = POW_twoV,POW_Q0_half,POW_UmZ2
+(p15) br.cond.spnt L(POW_X_0_Y_NEG)
+}
+;;
+
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 POW_Z3sq = POW_Z3, POW_Z3, f0
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+ fma.s1 POW_v4 = POW_Z3, POW_Q3, POW_Q2
+(p7) br.ret.spnt b0 // Exit if x zero and y not an integer
+}
+;;
+
+
+
+// Extract rounded integer from rightmost significand of POW_W2
+// By subtracting RSHF we get rounded integer POW_N1float
+{ .mfi
+ getf.sig pow_GR_int_W2 = POW_W2
+ fms.s1 POW_N1float = POW_W1, f1, POW_RSHF
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_v2 = POW_Z3, POW_Q1, POW_Q0_half
+ nop.i 999
+}
+;;
+
+
+
+
+// p13 = TRUE ==> X is NEGATIVE AND Y possible int
+// p10 = TRUE ==> X is NEG and Y is an int
+// p12 = TRUE ==> X is NEG and Y is not an int
+{ .mfi
+ nop.m 999
+(p13) fcmp.eq.unc.s1 p10,p12 = POW_float_int_Y, POW_NORM_Y
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p9) fma.s f8 = f0,f0,f0 // Result +0 if x zero and y even integer
+(p9) br.ret.spnt b0 // Exit if x zero and y even integer
+}
+;;
+
+
+{ .mfi
+ nop.m 999
+ fnma.s1 POW_s2 = POW_N2float, POW_log2_by_128_hi, POW_Z2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_e2 = POW_e2,f1,POW_UmZ2pV
+ nop.i 999
+}
+;;
+
+// Extract rounded integer from rightmost significand of POW_W1
+// Test if x inf
+{ .mfi
+ getf.sig pow_GR_int_W1 = POW_W1
+ fclass.m.unc p15,p0 = POW_NORM_X, 0x23
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+ fnma.s1 POW_f2 = POW_N2float, POW_log2_by_128_lo, f1
+(p12) br.cond.spnt L(POW_X_NEG_Y_NONINT) // Branch if x neg, y not integer
+}
+;;
+
+// p12 = TRUE ==> X is NEGATIVE AND Y is an odd integer
+{ .mfi
+ getf.exp pow_GR_signexp_Y_Gpr = POW_Y_Gpr
+ fma.s1 POW_v3 = POW_Z3sq, POW_Q4, POW_v4
+(p10) tbit.nz.unc p12,p0 = pow_GR_sig_int_Y,0
+}
+;;
+
+
+{ .mfi
+ add pow_GR_int_N = pow_GR_int_W1, pow_GR_int_W2
+ fnma.s1 POW_f1 = POW_N1float, POW_log2_by_128_lo, f1
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+ fnma.s1 POW_s1 = POW_N1float, POW_log2_by_128_hi, POW_Z1
+(p15) br.cond.spnt L(POW_X_INF)
+}
+;;
+
+
+// Test x and y and flag denormal
+{ .mfi
+ and pow_GR_index1 = 0x0f, pow_GR_int_N
+ fcmp.eq.s0 p15,p0 = f8,f9
+ shr r2 = pow_GR_int_N, 7
+}
+{ .mfi
+ and pow_GR_exp_Y_Gpr = pow_GR_signexp_Y_Gpr, pow_GR_17ones
+ nop.f 999
+ and pow_GR_index2 = 0x70, pow_GR_int_N
+}
+;;
+
+
+
+{ .mfi
+ shladd pow_AD_T1 = pow_GR_index1, 4, pow_AD_tbl1
+ fcmp.eq.s1 p7,p0 = POW_NORM_Y, f1 // Test for y=1.0
+ sub pow_GR_true_exp_Y_Gpr = pow_GR_exp_Y_Gpr, pow_GR_16ones
+}
+{ .mfi
+ addl pow_int_GR_M = 0xFFFF, r2
+ fma.s1 POW_e12 = POW_e1,f1,POW_e2
+ add pow_AD_T2 = pow_AD_tbl2, pow_GR_index2
+}
+;;
+
+
+{ .mmi
+ ldfe POW_T1 = [pow_AD_T1],16
+ setf.exp POW_2M = pow_int_GR_M
+ andcm pow_GR_sign_Y_Gpr = pow_GR_signexp_Y_Gpr, pow_GR_17ones
+}
+;;
+
+
+{ .mfb
+ ldfe POW_T2 = [pow_AD_T2],16
+ fma.s1 POW_q = POW_Z3sq, POW_v3, POW_v2
+(p7) br.ret.spnt b0 // Early exit if y=1.0, result is x
+}
+;;
+
+
+// double: p8 TRUE ==> |Y(G + r)| >= 10
+// single: p8 TRUE ==> |Y(G + r)| >= 7
+
+// double
+// -2^10 -2^9 2^9 2^10
+// -----+-----+----+ ... +-----+-----+-----
+// p8 | p9 | p8
+// | | p10 | |
+// single
+// -2^7 -2^6 2^6 2^7
+// -----+-----+----+ ... +-----+-----+-----
+// p8 | p9 | p8
+// | | p10 | |
+
+
+{ .mfi
+(p0) cmp.le.unc p8,p9 = 7, pow_GR_true_exp_Y_Gpr
+ fma.s1 POW_s = POW_s1, f1, POW_s2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_f12 = POW_f1, POW_f2,f0
+ nop.i 999
+}
+;;
+
+
+{ .mfi
+ nop.f 999
+(p9) cmp.le.unc p0,p10 = 6, pow_GR_true_exp_Y_Gpr
+}
+;;
+
+
+
+{ .mfb
+ nop.m 999
+ fma.s1 POW_e123 = POW_e12, f1, POW_e3
+(p8) br.cond.spnt L(POW_OVER_UNDER_X_NOT_INF)
+}
+;;
+
+
+{ .mmf
+ fma.s1 POW_q = POW_Z3sq, POW_q, POW_Z3
+}
+;;
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 POW_ssq = POW_s, POW_s, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_v4 = POW_s, POW_Q3, POW_Q2
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 POW_v2 = POW_s, POW_Q1, POW_Q0_half
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_1ps = f1,f1,POW_s
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 POW_f3 = POW_e123,f1,f1
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 POW_T1T2 = POW_T1, POW_T2, f0
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 POW_v3 = POW_ssq, POW_Q4, POW_v4
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 POW_v21ps = POW_ssq, POW_v2, POW_1ps
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_s4 = POW_ssq, POW_ssq, f0
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 POW_f123 = POW_f12, POW_f3, f0
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 POW_A = POW_2M, POW_T1T2, f0
+ nop.i 999
+}
+;;
+
+
+
+{ .mfi
+ nop.m 999
+(p12) fmerge.s POW_f123 = f8,POW_f123 // if x neg, y odd int
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+// fma.s1 POW_es = POW_ssq, POW_v3, POW_v2
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 POW_es = POW_s4, POW_v3, POW_v21ps
+ nop.i 999
+}
+;;
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 POW_A = POW_A, POW_f123, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+// fma.s1 POW_es = POW_es, POW_ssq, POW_1ps
+ nop.i 999
+}
+;;
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 POW_A = POW_A, POW_es,f0
+ nop.i 999
+}
+;;
+
+
+
+{ .mfb
+ nop.m 999
+(p10) fma.s f8 = POW_A, POW_q, POW_A
+(p10) br.ret.sptk b0
+}
+;;
+
+
+
+
+
+// POSSIBLE_OVER_UNDER
+// p6 = TRUE ==> Y negative
+
+{ .mfi
+ nop.m 999
+ fmerge.s POW_abs_A = f0, POW_A
+ cmp.eq.unc p0,p6 = pow_GR_sign_Y, r0
+}
+;;
+
+{ .mib
+ nop.m 999
+ nop.i 999
+(p6) br.cond.spnt L(POW_POSSIBLE_UNDER)
+}
+;;
+
+// POSSIBLE_OVER
+// We got an answer.
+// overflow is a possibility, not a certainty
+
+
+// We define an overflow when the answer with
+// WRE set
+// user-defined rounding mode
+
+// double
+// Largest double is 7FE (biased double)
+// 7FE - 3FF + FFFF = 103FE
+// Create + largest_double_plus_ulp
+// Create - largest_double_plus_ulp
+// Calculate answer with WRE set.
+
+// single
+// Largest single is FE (biased double)
+// FE - 7F + FFFF = 1007E
+// Create + largest_single_plus_ulp
+// Create - largest_single_plus_ulp
+// Calculate answer with WRE set.
+
+// Cases when answer is ldn+1 are as follows:
+// ldn ldn+1
+// --+----------|----------+------------
+// |
+// +inf +inf -inf
+// RN RN
+// RZ
+
+
+// Put in s2 (td set, wre set)
+{ .mfi
+ mov pow_GR_gt_ln = 0x1007f
+ fsetc.s2 0x7F,0x42
+ nop.i 999
+}
+;;
+
+
+{ .mfi
+ setf.exp POW_gt_pln = pow_GR_gt_ln
+ fma.s.s2 POW_wre_urm_f8 = POW_abs_A, POW_q, POW_abs_A
+ nop.i 999 ;;
+}
+
+// Return s2 to default
+{ .mfi
+ nop.m 999
+ fsetc.s2 0x7F,0x40
+ nop.i 999
+}
+;;
+
+
+// p7 = TRUE ==> yes, we have an overflow
+{ .mfi
+ nop.m 999
+ fcmp.ge.unc.s1 p7, p0 = POW_wre_urm_f8, POW_gt_pln
+ nop.i 999
+}
+;;
+
+
+
+{ .mfb
+(p7) mov pow_GR_tag = 30
+ fma.s f8 = POW_A, POW_q, POW_A
+(p7) br.cond.spnt __libm_error_region
+}
+{ .mfb
+ nop.m 999
+ nop.f 999
+(p0) br.ret.sptk b0
+}
+;;
+
+
+L(POW_POSSIBLE_UNDER):
+// We got an answer. input was < -2^9 but > -2^10 (double)
+// We got an answer. input was < -2^6 but > -2^7 (float)
+// underflow is a possibility, not a certainty
+
+// We define an underflow when the answer with
+// ftz set
+// is zero (tiny numbers become zero)
+// Notice (from below) that if we have an unlimited exponent range,
+// then there is an extra machine number E between the largest denormal and
+// the smallest normal.
+// So if with unbounded exponent we round to E or below, then we are
+// tiny and underflow has occurred.
+// But notice that you can be in a situation where we are tiny, namely
+// rounded to E, but when the exponent is bounded we round to smallest
+// normal. So the answer can be the smallest normal with underflow.
+// E
+// -----+--------------------+--------------------+-----
+// | | |
+// 1.1...10 2^-3fff 1.1...11 2^-3fff 1.0...00 2^-3ffe
+// 0.1...11 2^-3ffe (biased, 1)
+// largest dn smallest normal
+
+
+// Put in s2 (td set, ftz set)
+{ .mfi
+ nop.m 999
+ fsetc.s2 0x7F,0x41
+ nop.i 999
+}
+;;
+
+
+
+{ .mfi
+ nop.m 999
+ fma.s.s2 POW_ftz_urm_f8 = POW_A, POW_q, POW_A
+ nop.i 999
+}
+;;
+
+
+// Return s2 to default
+{ .mfi
+ nop.m 999
+ fsetc.s2 0x7F,0x40
+ nop.i 999
+}
+;;
+
+
+// p7 = TRUE ==> yes, we have an underflow
+{ .mfi
+ nop.m 999
+ fcmp.eq.unc.s1 p7, p0 = POW_ftz_urm_f8, f0
+ nop.i 999
+}
+;;
+
+
+
+
+{ .mfb
+(p7) mov pow_GR_tag = 31
+ fma.s f8 = POW_A, POW_q, POW_A
+(p7) br.cond.spnt __libm_error_region
+}
+;;
+
+
+{ .mfb
+ nop.m 999
+ nop.f 999
+ br.ret.sptk b0
+}
+;;
+
+
+L(POW_X_DENORM):
+// Here if x unorm. Use the NORM_X for getf instructions, and the back
+// to normal path
+{ .mfi
+ getf.exp pow_GR_signexp_X = POW_NORM_X
+ nop.f 999
+ nop.i 999
+}
+;;
+
+{ .mfi
+ getf.sig pow_GR_sig_X = POW_NORM_X
+ nop.f 999
+ nop.i 999
+}
+;;
+
+{ .mfi
+ and pow_GR_exp_X = pow_GR_signexp_X, pow_GR_17ones
+ nop.f 999
+}
+;;
+
+{ .mib
+ sub pow_GR_true_exp_X = pow_GR_exp_X, pow_GR_16ones
+ shl pow_GR_offset = pow_GR_sig_X, 1
+ br.cond.sptk L(POW_COMMON)
+}
+;;
+
+
+L(POW_X_0_Y_0):
+// When X is +-0 and Y is +-0, IEEE returns 1.0
+// We call error support with this value
+
+{ .mfb
+ mov pow_GR_tag = 32
+ fma.s f8 = f1,f1,f0
+ br.cond.sptk __libm_error_region
+}
+;;
+
+
+
+
+L(POW_X_INF):
+// When X is +-inf and Y is +-, IEEE returns
+
+// overflow
+// X +inf Y +inf +inf
+// X -inf Y +inf +inf
+
+// X +inf Y >0 +inf
+// X -inf Y >0, !odd integer +inf <== (-inf)^0.5 = +inf !!
+// X -inf Y >0, odd integer -inf
+
+// underflow
+// X +inf Y -inf +0
+// X -inf Y -inf +0
+
+// X +inf Y <0 +0
+// X -inf Y <0, !odd integer +0
+// X -inf Y <0, odd integer -0
+
+// X + inf Y=+0 +1
+// X + inf Y=-0 +1
+// X - inf Y=+0 +1
+// X - inf Y=-0 +1
+
+// p13 == Y negative
+// p14 == Y positive
+
+// p6 == Y is a floating point number outside the integer.
+// Hence it is an integer and is even.
+// p13 == (Y negative)
+// return +inf
+// p14 == (Y positive)
+// return +0
+
+
+
+// p7 == Y is a floating point number within the integer range.
+// p9 == (int_Y = NORM_Y), Y is an integer, which may be odd or even.
+// p11 odd
+// p13 == (Y negative)
+// return (sign_of_x)inf
+// p14 == (Y positive)
+// return (sign_of_x)0
+// pxx even
+// p13 == (Y negative)
+// return +inf
+// p14 == (Y positive)
+// return +0
+
+// pxx == Y is not an integer
+// p13 == (Y negative)
+// return +inf
+// p14 == (Y positive)
+// return +0
+//
+
+// If x=inf, test y and flag denormal
+{ .mfi
+ nop.m 999
+ fcmp.eq.s0 p10,p11 = f9,f0
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fcmp.lt p13,p14 = POW_NORM_Y,f0
+ cmp.gt.unc p6,p7 = pow_GR_exp_Y, pow_GR_10033
+}
+{ .mfi
+ nop.m 999
+ fclass.m p12,p0 = f9, 0x23
+ nop.i 999
+}
+;;
+
+
+{ .mfi
+ nop.m 999
+ fclass.m p15,p0 = f9, 0x07 //@zero
+ nop.i 999
+}
+;;
+
+{ .mfb
+ nop.m 999
+(p15) fmerge.s f8 = f1,f1
+(p15) br.ret.spnt b0
+}
+;;
+
+
+{ .mfi
+(p13) mov pow_GR_tag = 31
+(p14) frcpa.s1 f8,p10 = f1,f0
+ nop.i 999
+}
+{ .mfb
+(p14) mov pow_GR_tag = 30
+(p13) fma.s1 f8 = f0,f0,f0
+(p12) br.ret.spnt b0
+}
+;;
+
+
+
+{ .mfb
+ nop.m 999
+(p7) fcmp.eq.unc.s1 p9,p0 = POW_float_int_Y, POW_NORM_Y
+ nop.b 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ nop.f 999
+(p9) tbit.nz.unc p11,p0 = pow_GR_sig_int_Y,0
+}
+;;
+
+{ .mfb
+ nop.m 999
+(p11) fmerge.s f8 = POW_NORM_X,f8
+ br.ret.sptk b0
+}
+;;
+
+
+
+L(POW_X_0_Y_NEG):
+// When X is +-0 and Y is negative, IEEE returns
+// X Y answer
+// +0 -odd int +inf
+// -0 -odd int -inf
+
+// +0 !-odd int +inf
+// -0 !-odd int +inf
+
+
+// p6 == Y is a floating point number outside the integer.
+// Hence it is an integer and is even.
+// return +inf
+
+// p7 == Y is a floating point number within the integer range.
+// p9 == (int_Y = NORM_Y), Y is an integer, which may be odd or even.
+// p11 odd
+// return (sign_of_x)inf
+// p12 even
+// return +inf
+// p10 == Y is not an integer
+// return +inf
+//
+//
+
+{ .mfi
+ nop.m 999
+ nop.f 999
+ cmp.gt.unc p6,p7 = pow_GR_exp_Y, pow_GR_10033
+}
+;;
+
+
+{ .mfi
+ mov pow_GR_tag = 33
+(p7) fcmp.eq.unc.s1 p9,p10 = POW_float_int_Y, POW_NORM_Y
+ nop.i 999
+}
+;;
+
+
+{ .mfb
+ nop.m 999
+(p6) frcpa.s0 f8,p13 = f1, f0
+(p6) br.cond.sptk __libm_error_region
+}
+;;
+
+{ .mfb
+ nop.m 999
+(p10) frcpa.s0 f8,p13 = f1, f0
+(p10) br.cond.sptk __libm_error_region
+}
+;;
+
+
+
+{ .mib
+ nop.m 999
+(p9) tbit.nz.unc p11,p12 = pow_GR_sig_int_Y,0
+ nop.b 999
+}
+;;
+
+
+
+{ .mfi
+ nop.m 999
+(p12) frcpa.s0 f8,p13 = f1,f0
+ nop.i 999
+}
+;;
+
+{ .mfb
+ nop.m 999
+(p11) frcpa f8,p13 = f1,f8
+ br.cond.sptk __libm_error_region
+}
+;;
+
+
+
+
+L(POW_X_NEG_Y_NONINT):
+// When X is negative and Y is a non-integer, IEEE
+// returns a qnan indefinite.
+// We call error support with this value
+
+{ .mfb
+ mov pow_GR_tag = 34
+ frcpa f8,p6 = f0,f0
+ br.cond.sptk __libm_error_region
+}
+;;
+
+
+
+
+L(POW_X_NAN_Y_0):
+// When X is a NAN and Y is zero, IEEE returns 1.
+// We call error support with this value.
+
+{ .mfi
+ nop.m 0
+ fma.s.s0 f10 = f8,f1,f0
+ nop.i 0
+}
+{ .mfb
+ mov pow_GR_tag = 35
+ fma.s.s0 f8 = f0,f0,f1
+ br.cond.sptk __libm_error_region
+}
+;;
+
+
+L(POW_OVER_UNDER_X_NOT_INF):
+
+// p8 is TRUE for overflow
+// p9 is TRUE for underflow
+
+// if y is infinity, we should not over/underflow
+
+
+{ .mfi
+ nop.m 999
+ fcmp.eq.unc.s1 p14, p13 = POW_xsq,f1
+ cmp.eq.unc p8,p9 = pow_GR_sign_Y_Gpr, r0
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p14) fclass.m.unc p15, p0 = f9, 0x23
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p13) fclass.m.unc p11,p0 = f9, 0x23
+ nop.i 999
+}
+;;
+
+// p15 = TRUE if |x|=1, y=inf, return +1
+{ .mfb
+ nop.m 999
+(p15) fma.s f8 = f1,f1,f0
+(p15) br.ret.spnt b0
+}
+;;
+
+.pred.rel "mutex",p8,p9
+{ .mfb
+(p8) setf.exp f8 = pow_GR_17ones
+(p9) fmerge.s f8 = f0,f0
+(p11) br.ret.sptk b0
+}
+
+{ .mfb
+ nop.m 999
+ nop.f 999
+ br.cond.sptk L(POW_OVER_UNDER_ERROR)
+}
+;;
+
+L(POW_Y_NAN):
+
+// Is x = +1 then result is +1, else result is quiet Y
+{ .mfi
+ nop.m 999
+ fcmp.eq.s1 p10,p9 = POW_NORM_X, f1
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p10) fcmp.eq.s0 p6,p0 = f9,f1 // Set invalid, even if x=+1
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p10) fma.s f8 = f1,f1,f0
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p9) fma.s f8 = f9,f8,f0
+ br.ret.sptk b0
+}
+;;
+
+
+L(POW_OVER_UNDER_ERROR):
+
+{ .mfi
+ nop.m 999
+ fmerge.s f10 = POW_NORM_X,POW_NORM_X
+ nop.i 999
+}
+{ .mfi
+ sub pow_GR_17ones_m1 = pow_GR_17ones, r0, 1
+ nop.f 999
+ mov pow_GR_one = 0x1
+}
+;;
+
+// overflow
+{ .mmb
+(p8) mov pow_GR_tag = 30
+(p8) setf.exp f11 = pow_GR_17ones_m1
+ nop.b 999
+}
+;;
+
+
+// underflow
+{ .mmi
+(p9) mov pow_GR_tag = 31
+(p9) setf.exp f11 = pow_GR_one
+ nop.i 999
+}
+;;
+
+
+// p12 x is negative and y is an odd integer
+
+
+{ .mfi
+ nop.m 999
+ fma.s f8 = f11, f11, f0
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p12) fmerge.ns f8 = f8, f8
+ nop.i 999
+}
+;;
+
+
+.endp powf
+ASM_SIZE_DIRECTIVE(powf)
+
+
+// Stack operations when calling error support.
+// (1) (2) (3) (call) (4)
+// sp -> + psp -> + psp -> + sp -> +
+// | | | |
+// | | <- GR_Y R3 ->| <- GR_RESULT | -> f8
+// | | | |
+// | <-GR_Y Y2->| Y2 ->| <- GR_Y |
+// | | | |
+// | | <- GR_X X1 ->| |
+// | | | |
+// sp-64 -> + sp -> + sp -> + +
+// save ar.pfs save b0 restore gp
+// save gp restore ar.pfs
+
+
+
+.proc __libm_error_region
+__libm_error_region:
+
+// Answer is inf for overflow and 0 for underflow.
+.prologue
+// (1)
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+
+
+// (2)
+{ .mmi
+ stfs [GR_Parameter_Y] = POW_NORM_Y,16 // STORE Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+
+.body
+// (3)
+{ .mib
+ stfs [GR_Parameter_X] = POW_NORM_X // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
+}
+{ .mib
+ stfs [GR_Parameter_Y] = f8 // STORE Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+
+// (4)
+{ .mmi
+ ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_powl.S b/sysdeps/ia64/fpu/e_powl.S
new file mode 100644
index 0000000..3b99044
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_powl.S
@@ -0,0 +1,3437 @@
+.file "powl.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// *********************************************************************
+//
+// Function: powl(x,y), where
+// y
+// powl(x,y) = x , for double extended precision x and y values
+//
+// *********************************************************************
+//
+// History:
+// 2/02/00 (Hand Optimized)
+// 4/04/00 Unwind support added
+// 8/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+// 1/22/01 Corrected results for powl(1,inf), powl(1,nan), and
+// powl(snan,0) to be 1 per C99, not nan. Fixed many flag settings.
+// 2/06/01 Call __libm_error support if over/underflow when y=2.
+//
+// *********************************************************************
+//
+// Resources Used:
+//
+// Floating-Point Registers:
+// f8 (Input and Return Value)
+// f9-f15,f32-f63,f99
+//
+// General Purpose Registers:
+// Locals r32 - r61
+// Parameters to __libm_error_support r62,r63,r64,r65
+//
+// Predicate Registers: p6-p15
+//
+// *********************************************************************
+//
+// Special Cases and IEEE special conditions:
+//
+// Denormal fault raised on denormal inputs
+// Overflow exceptions raised when appropriate for pow
+// Underflow exceptions raised when appropriate for pow
+// (Error Handling Routine called for overflow and Underflow)
+// Inexact raised when appropriate by algorithm
+//
+// 1. (anything) ** NatVal or (NatVal) ** anything is NatVal
+// 2. X or Y unsupported or sNaN is qNaN/Invalid
+// 3. (anything) ** 0 is 1
+// 4. (anything) ** 1 is itself
+// 5. (anything except 1) ** qNAN is qNAN
+// 6. qNAN ** (anything except 0) is qNAN
+// 7. +-(|x| > 1) ** +INF is +INF
+// 8. +-(|x| > 1) ** -INF is +0
+// 9. +-(|x| < 1) ** +INF is +0
+// 10. +-(|x| < 1) ** -INF is +INF
+// 11. +-1 ** +-INF is +1
+// 12. +0 ** (+anything except 0, NAN) is +0
+// 13. -0 ** (+anything except 0, NAN, odd integer) is +0
+// 14. +0 ** (-anything except 0, NAN) is +INF/div_0
+// 15. -0 ** (-anything except 0, NAN, odd integer) is +INF/div_0
+// 16. -0 ** (odd integer) = -( +0 ** (odd integer) )
+// 17. +INF ** (+anything except 0,NAN) is +INF
+// 18. +INF ** (-anything except 0,NAN) is +0
+// 19. -INF ** (anything except NAN) = -0 ** (-anything)
+// 20. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
+// 21. (-anything except 0 and inf) ** (non-integer) is qNAN/Invalid
+// 22. X or Y denorm/unorm and denorm/unorm operand trap is enabled,
+// generate denorm/unorm fault except if invalid or div_0 raised.
+//
+// *********************************************************************
+//
+// Algorithm
+// =========
+//
+// Special Cases
+//
+// If Y = 2, return X*X.
+// If Y = 0.5, return sqrt(X).
+//
+// Compute log(X) to extra precision.
+//
+// ker_log_80( X, logX_hi, logX_lo, Safe );
+//
+// ...logX_hi + logX_lo approximates log(X) to roughly 80
+// ...significant bits of accuracy.
+//
+// Compute Y*log(X) to extra precision.
+//
+// P_hi := Y * logX_hi
+// P_lo := Y * logX_hi - P_hi ...using FMA
+// P_lo := Y * logX_lo + P_lo ...using FMA
+//
+// Compute exp(P_hi + P_lo)
+//
+// Flag := 2;
+// Expo_Range := 2; (assuming double-extended power function)
+// ker_exp_64( P_hi, P_lo, Flag, Expo_Range,
+// Z_hi, Z_lo, scale, Safe )
+//
+// scale := sgn * scale
+//
+// If (Safe) then ...result will not over/underflow
+// return scale*Z_hi + (scale*Z_lo)
+// quickly
+// Else
+// take necessary precaution in computing
+// scale*Z_hi + (scale*Z_lo)
+// to set possible exceptions correctly.
+// End If
+//
+// Case_Y_Special
+//
+// ...Follow the order of the case checks
+//
+// If Y is +-0, return +1 without raising any exception.
+// If Y is +1, return X without raising any exception.
+// If Y is qNaN, return Y without exception.
+// If X is qNaN, return X without exception.
+//
+// At this point, X is real and Y is +-inf.
+// Thus |X| can only be 1, strictly bigger than 1, or
+// strictly less than 1.
+//
+// If |X| < 1, then
+// return ( Y == +inf? +0 : +inf )
+// elseif |X| > 1, then
+// return ( Y == +inf? +0 : +inf )
+// else
+// goto Case_Invalid
+//
+// Case_X_Special
+//
+// ...Follow the order of the case checks
+// ...Note that Y is real, finite, non-zero, and not +1.
+//
+// If X is qNaN, return X without exception.
+//
+// If X is +-0,
+// return ( Y > 0 ? +0 : +inf )
+//
+// If X is +inf
+// return ( Y > 0 ? +inf : +0 )
+//
+// If X is -inf
+// return -0 ** -Y
+// return ( Y > 0 ? +inf : +0 )
+//
+// Case_Invalid
+//
+// Return 0 * inf to generate a quiet NaN together
+// with an invalid exception.
+//
+// Implementation
+// ==============
+//
+// We describe the quick branch since this part is important
+// in reaching the normal case efficiently.
+//
+// STAGE 1
+// -------
+// This stage contains two threads.
+//
+// Stage1.Thread1
+//
+// fclass.m X_excep, X_ok = X, (NatVal or s/qNaN) or
+// +-0, +-infinity
+//
+// fclass.nm X_unsupp, X_supp = X, (NatVal or s/qNaN) or
+// +-(0, unnorm, norm, infinity)
+//
+// X_norm := fnorm( X ) with traps disabled
+//
+// If (X_excep) goto Filtering (Step 2)
+// If (X_unsupp) goto Filtering (Step 2)
+//
+// Stage1.Thread2
+// ..............
+//
+// fclass.m Y_excep, Y_ok = Y, (NatVal or s/qNaN) or
+// +-0, +-infinity
+//
+// fclass.nm Y_unsupp, Y_supp = Y, (NatVal or s/qNaN) or
+// +-(0, unnorm, norm, infinity)
+//
+// Y_norm := fnorm( Y ) with traps disabled
+//
+// If (Y_excep) goto Filtering (Step 2)
+// If (Y_unsupp) goto Filtering (Step 2)
+//
+//
+// STAGE 2
+// -------
+// This stage contains two threads.
+//
+// Stage2.Thread1
+// ..............
+//
+// Set X_lt_0 if X < 0 (using fcmp)
+// sgn := +1.0
+// If (X_lt_0) goto Filtering (Step 2)
+//
+// Stage2.Thread2
+// ..............
+//
+// Set Y_is_1 if Y = +1 (using fcmp)
+// If (Y_is_1) goto Filtering (Step 2)
+//
+// STAGE 3
+// -------
+// This stage contains two threads.
+//
+//
+// Stage3.Thread1
+// ..............
+//
+// X := fnorm(X) in prevailing traps
+//
+//
+// Stage3.Thread2
+// ..............
+//
+// Y := fnorm(Y) in prevailing traps
+//
+// STAGE 4
+// -------
+//
+// Go to Case_Normal.
+//
+
+#include "libm_support.h"
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
+// Inv_L, L_hi, L_lo
+.align 64
+Constants_exp_64_Arg:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_Arg,@object)
+data4 0x5C17F0BC,0xB8AA3B29,0x0000400B,0x00000000
+data4 0x00000000,0xB17217F4,0x00003FF2,0x00000000
+data4 0xF278ECE6,0xF473DE6A,0x00003FD4,0x00000000
+ASM_SIZE_DIRECTIVE(Constants_exp_64_Arg)
+
+.align 64
+Constants_exp_64_Exponents:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_Exponents,@object)
+data4 0x0000007E,0x00000000,0xFFFFFF83,0xFFFFFFFF
+data4 0x000003FE,0x00000000,0xFFFFFC03,0xFFFFFFFF
+data4 0x00003FFE,0x00000000,0xFFFFC003,0xFFFFFFFF
+data4 0x00003FFE,0x00000000,0xFFFFC003,0xFFFFFFFF
+data4 0xFFFFFFE2,0xFFFFFFFF,0xFFFFFFC4,0xFFFFFFFF
+data4 0xFFFFFFBA,0xFFFFFFFF,0xFFFFFFBA,0xFFFFFFFF
+ASM_SIZE_DIRECTIVE(Constants_exp_64_Exponents)
+
+.align 64
+Constants_exp_64_A:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_A,@object)
+// Reversed
+data4 0xB1B736A0,0xAAAAAAAB,0x00003FFA,0x00000000
+data4 0x90CD6327,0xAAAAAAAB,0x00003FFC,0x00000000
+data4 0xFFFFFFFF,0xFFFFFFFF,0x00003FFD,0x00000000
+ASM_SIZE_DIRECTIVE(Constants_exp_64_A)
+
+.align 64
+Constants_exp_64_P:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_P,@object)
+// Reversed
+data4 0x43914A8A,0xD00D6C81,0x00003FF2,0x00000000
+data4 0x30304B30,0xB60BC4AC,0x00003FF5,0x00000000
+data4 0x7474C518,0x88888888,0x00003FF8,0x00000000
+data4 0x8DAE729D,0xAAAAAAAA,0x00003FFA,0x00000000
+data4 0xAAAAAF61,0xAAAAAAAA,0x00003FFC,0x00000000
+data4 0x000004C7,0x80000000,0x00003FFE,0x00000000
+ASM_SIZE_DIRECTIVE(Constants_exp_64_P)
+
+.align 64
+Constants_exp_64_T1:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_T1,@object)
+data4 0x3F800000,0x3F8164D2,0x3F82CD87,0x3F843A29
+data4 0x3F85AAC3,0x3F871F62,0x3F88980F,0x3F8A14D5
+data4 0x3F8B95C2,0x3F8D1ADF,0x3F8EA43A,0x3F9031DC
+data4 0x3F91C3D3,0x3F935A2B,0x3F94F4F0,0x3F96942D
+data4 0x3F9837F0,0x3F99E046,0x3F9B8D3A,0x3F9D3EDA
+data4 0x3F9EF532,0x3FA0B051,0x3FA27043,0x3FA43516
+data4 0x3FA5FED7,0x3FA7CD94,0x3FA9A15B,0x3FAB7A3A
+data4 0x3FAD583F,0x3FAF3B79,0x3FB123F6,0x3FB311C4
+data4 0x3FB504F3,0x3FB6FD92,0x3FB8FBAF,0x3FBAFF5B
+data4 0x3FBD08A4,0x3FBF179A,0x3FC12C4D,0x3FC346CD
+data4 0x3FC5672A,0x3FC78D75,0x3FC9B9BE,0x3FCBEC15
+data4 0x3FCE248C,0x3FD06334,0x3FD2A81E,0x3FD4F35B
+data4 0x3FD744FD,0x3FD99D16,0x3FDBFBB8,0x3FDE60F5
+data4 0x3FE0CCDF,0x3FE33F89,0x3FE5B907,0x3FE8396A
+data4 0x3FEAC0C7,0x3FED4F30,0x3FEFE4BA,0x3FF28177
+data4 0x3FF5257D,0x3FF7D0DF,0x3FFA83B3,0x3FFD3E0C
+ASM_SIZE_DIRECTIVE(Constants_exp_64_T1)
+
+.align 64
+Constants_exp_64_T2:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_T2,@object)
+data4 0x3F800000,0x3F80058C,0x3F800B18,0x3F8010A4
+data4 0x3F801630,0x3F801BBD,0x3F80214A,0x3F8026D7
+data4 0x3F802C64,0x3F8031F2,0x3F803780,0x3F803D0E
+data4 0x3F80429C,0x3F80482B,0x3F804DB9,0x3F805349
+data4 0x3F8058D8,0x3F805E67,0x3F8063F7,0x3F806987
+data4 0x3F806F17,0x3F8074A8,0x3F807A39,0x3F807FCA
+data4 0x3F80855B,0x3F808AEC,0x3F80907E,0x3F809610
+data4 0x3F809BA2,0x3F80A135,0x3F80A6C7,0x3F80AC5A
+data4 0x3F80B1ED,0x3F80B781,0x3F80BD14,0x3F80C2A8
+data4 0x3F80C83C,0x3F80CDD1,0x3F80D365,0x3F80D8FA
+data4 0x3F80DE8F,0x3F80E425,0x3F80E9BA,0x3F80EF50
+data4 0x3F80F4E6,0x3F80FA7C,0x3F810013,0x3F8105AA
+data4 0x3F810B41,0x3F8110D8,0x3F81166F,0x3F811C07
+data4 0x3F81219F,0x3F812737,0x3F812CD0,0x3F813269
+data4 0x3F813802,0x3F813D9B,0x3F814334,0x3F8148CE
+data4 0x3F814E68,0x3F815402,0x3F81599C,0x3F815F37
+ASM_SIZE_DIRECTIVE(Constants_exp_64_T2)
+
+.align 64
+Constants_exp_64_W1:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_W1,@object)
+data4 0x00000000,0x00000000,0x171EC4B4,0xBE384454
+data4 0x4AA72766,0xBE694741,0xD42518F8,0xBE5D32B6
+data4 0x3A319149,0x3E68D96D,0x62415F36,0xBE68F4DA
+data4 0xC9C86A3B,0xBE6DDA2F,0xF49228FE,0x3E6B2E50
+data4 0x1188B886,0xBE49C0C2,0x1A4C2F1F,0x3E64BFC2
+data4 0x2CB98B54,0xBE6A2FBB,0x9A55D329,0x3E5DC5DE
+data4 0x39A7AACE,0x3E696490,0x5C66DBA5,0x3E54728B
+data4 0xBA1C7D7D,0xBE62B0DB,0x09F1AF5F,0x3E576E04
+data4 0x1A0DD6A1,0x3E612500,0x795FBDEF,0xBE66A419
+data4 0xE1BD41FC,0xBE5CDE8C,0xEA54964F,0xBE621376
+data4 0x476E76EE,0x3E6370BE,0x3427EB92,0x3E390D1A
+data4 0x2BF82BF8,0x3E1336DE,0xD0F7BD9E,0xBE5FF1CB
+data4 0x0CEB09DD,0xBE60A355,0x0980F30D,0xBE5CA37E
+data4 0x4C082D25,0xBE5C541B,0x3B467D29,0xBE5BBECA
+data4 0xB9D946C5,0xBE400D8A,0x07ED374A,0xBE5E2A08
+data4 0x365C8B0A,0xBE66CB28,0xD3403BCA,0x3E3AAD5B
+data4 0xC7EA21E0,0x3E526055,0xE72880D6,0xBE442C75
+data4 0x85222A43,0x3E58B2BB,0x522C42BF,0xBE5AAB79
+data4 0x469DC2BC,0xBE605CB4,0xA48C40DC,0xBE589FA7
+data4 0x1AA42614,0xBE51C214,0xC37293F4,0xBE48D087
+data4 0xA2D673E0,0x3E367A1C,0x114F7A38,0xBE51BEBB
+data4 0x661A4B48,0xBE6348E5,0x1D3B9962,0xBDF52643
+data4 0x35A78A53,0x3E3A3B5E,0x1CECD788,0xBE46C46C
+data4 0x7857D689,0xBE60B7EC,0xD14F1AD7,0xBE594D3D
+data4 0x4C9A8F60,0xBE4F9C30,0x02DFF9D2,0xBE521873
+data4 0x55E6D68F,0xBE5E4C88,0x667F3DC4,0xBE62140F
+data4 0x3BF88747,0xBE36961B,0xC96EC6AA,0x3E602861
+data4 0xD57FD718,0xBE3B5151,0xFC4A627B,0x3E561CD0
+data4 0xCA913FEA,0xBE3A5217,0x9A5D193A,0x3E40A3CC
+data4 0x10A9C312,0xBE5AB713,0xC5F57719,0x3E4FDADB
+data4 0xDBDF59D5,0x3E361428,0x61B4180D,0x3E5DB5DB
+data4 0x7408D856,0xBE42AD5F,0x31B2B707,0x3E2A3148
+ASM_SIZE_DIRECTIVE(Constants_exp_64_W1)
+
+.align 64
+Constants_exp_64_W2:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_W2,@object)
+data4 0x00000000,0x00000000,0x37A3D7A2,0xBE641F25
+data4 0xAD028C40,0xBE68DD57,0xF212B1B6,0xBE5C77D8
+data4 0x1BA5B070,0x3E57878F,0x2ECAE6FE,0xBE55A36A
+data4 0x569DFA3B,0xBE620608,0xA6D300A3,0xBE53B50E
+data4 0x223F8F2C,0x3E5B5EF2,0xD6DE0DF4,0xBE56A0D9
+data4 0xEAE28F51,0xBE64EEF3,0x367EA80B,0xBE5E5AE2
+data4 0x5FCBC02D,0x3E47CB1A,0x9BDAFEB7,0xBE656BA0
+data4 0x805AFEE7,0x3E6E70C6,0xA3415EBA,0xBE6E0509
+data4 0x49BFF529,0xBE56856B,0x00508651,0x3E66DD33
+data4 0xC114BC13,0x3E51165F,0xC453290F,0x3E53333D
+data4 0x05539FDA,0x3E6A072B,0x7C0A7696,0xBE47CD87
+data4 0xEB05C6D9,0xBE668BF4,0x6AE86C93,0xBE67C3E3
+data4 0xD0B3E84B,0xBE533904,0x556B53CE,0x3E63E8D9
+data4 0x63A98DC8,0x3E212C89,0x032A7A22,0xBE33138F
+data4 0xBC584008,0x3E530FA9,0xCCB93C97,0xBE6ADF82
+data4 0x8370EA39,0x3E5F9113,0xFB6A05D8,0x3E5443A4
+data4 0x181FEE7A,0x3E63DACD,0xF0F67DEC,0xBE62B29D
+data4 0x3DDE6307,0x3E65C483,0xD40A24C1,0x3E5BF030
+data4 0x14E437BE,0x3E658B8F,0xED98B6C7,0xBE631C29
+data4 0x04CF7C71,0x3E6335D2,0xE954A79D,0x3E529EED
+data4 0xF64A2FB8,0x3E5D9257,0x854ED06C,0xBE6BED1B
+data4 0xD71405CB,0x3E5096F6,0xACB9FDF5,0xBE3D4893
+data4 0x01B68349,0xBDFEB158,0xC6A463B9,0x3E628D35
+data4 0xADE45917,0xBE559725,0x042FC476,0xBE68C29C
+data4 0x01E511FA,0xBE67593B,0x398801ED,0xBE4A4313
+data4 0xDA7C3300,0x3E699571,0x08062A9E,0x3E5349BE
+data4 0x755BB28E,0x3E5229C4,0x77A1F80D,0x3E67E426
+data4 0x6B69C352,0xBE52B33F,0x084DA57F,0xBE6B3550
+data4 0xD1D09A20,0xBE6DB03F,0x2161B2C1,0xBE60CBC4
+data4 0x78A2B771,0x3E56ED9C,0x9D0FA795,0xBE508E31
+data4 0xFD1A54E9,0xBE59482A,0xB07FD23E,0xBE2A17CE
+data4 0x17365712,0x3E68BF5C,0xB3785569,0x3E3956F9
+ASM_SIZE_DIRECTIVE(Constants_exp_64_W2)
+
+.align 64
+Constants_log_80_P:
+ASM_TYPE_DIRECTIVE(Constants_log_80_P,@object)
+// 1/2, P_8, P_7, ..., P_1
+data4 0x00000000, 0x80000000, 0x00003FFE, 0x00000000
+data4 0x3B1042BC, 0xCCCE8B88, 0x0000BFFB, 0x00000000
+data4 0xCADC2149, 0xE38997B7, 0x00003FFB, 0x00000000
+data4 0xB1ACB090, 0xFFFFFFFE, 0x0000BFFB, 0x00000000
+data4 0x06481C81, 0x92492498, 0x00003FFC, 0x00000000
+data4 0xAAAAB0EF, 0xAAAAAAAA, 0x0000BFFC, 0x00000000
+data4 0xCCC91416, 0xCCCCCCCC, 0x00003FFC, 0x00000000
+data4 0x00000000, 0x80000000, 0x0000BFFD, 0x00000000
+data4 0xAAAAAAAB, 0xAAAAAAAA, 0x00003FFD
+ASM_SIZE_DIRECTIVE(Constants_log_80_P)
+
+.align 64
+Constants_log_80_Q:
+ASM_TYPE_DIRECTIVE(Constants_log_80_Q,@object)
+// log2_hi, log2_lo, Q_6, Q_5, Q_4, Q_3, Q_2, Q_1
+data4 0x00000000,0xB1721800,0x00003FFE,0x00000000
+data4 0x4361C4C6,0x82E30865,0x0000BFE2,0x00000000
+data4 0xA51BE0AF,0x92492453,0x00003FFC,0x00000000
+data4 0xA0CFD29F,0xAAAAAB73,0x0000BFFC,0x00000000
+data4 0xCCCE3872,0xCCCCCCCC,0x00003FFC,0x00000000
+data4 0xFFFFB4FB,0xFFFFFFFF,0x0000BFFC,0x00000000
+data4 0xAAAAAAAB,0xAAAAAAAA,0x00003FFD,0x00000000
+data4 0x00000000,0x80000000,0x0000BFFE,0x00000000
+ASM_SIZE_DIRECTIVE(Constants_log_80_Q)
+
+.align 64
+Constants_log_80_Z_G_H_h1:
+ASM_TYPE_DIRECTIVE(Constants_log_80_Z_G_H_h1,@object)
+// Z1 - 16 bit fixed, G1 and H1 IEEE single, h1 IEEE double
+data4 0x00008000,0x3F800000,0x00000000,0x00000000
+data4 0x00000000,0x00000000,0x00000000,0x00000000
+data4 0x00007879,0x3F70F0F0,0x3D785196,0x00000000
+data4 0xEBA0E0D1,0x8B1D330B,0x00003FDA,0x00000000
+data4 0x000071C8,0x3F638E38,0x3DF13843,0x00000000
+data4 0x9EADD553,0xE2AF365E,0x00003FE2,0x00000000
+data4 0x00006BCB,0x3F579430,0x3E2FF9A0,0x00000000
+data4 0x752F34A2,0xF585FEC3,0x0000BFE3,0x00000000
+data4 0x00006667,0x3F4CCCC8,0x3E647FD6,0x00000000
+data4 0x893B03F3,0xF3546435,0x00003FE2,0x00000000
+data4 0x00006187,0x3F430C30,0x3E8B3AE7,0x00000000
+data4 0x39CDD2AC,0xBABA62E0,0x00003FE4,0x00000000
+data4 0x00005D18,0x3F3A2E88,0x3EA30C68,0x00000000
+data4 0x457978A1,0x8718789F,0x00003FE2,0x00000000
+data4 0x0000590C,0x3F321640,0x3EB9CEC8,0x00000000
+data4 0x3185E56A,0x9442DF96,0x0000BFE4,0x00000000
+data4 0x00005556,0x3F2AAAA8,0x3ECF9927,0x00000000
+data4 0x2BBE2CBD,0xCBF9A4BF,0x00003FE4,0x00000000
+data4 0x000051EC,0x3F23D708,0x3EE47FC5,0x00000000
+data4 0x852D5935,0xF3537535,0x00003FE3,0x00000000
+data4 0x00004EC5,0x3F1D89D8,0x3EF8947D,0x00000000
+data4 0x46CDF32F,0xA1F1E699,0x0000BFDF,0x00000000
+data4 0x00004BDB,0x3F17B420,0x3F05F3A1,0x00000000
+data4 0xD8484CE3,0x84A61856,0x00003FE4,0x00000000
+data4 0x00004925,0x3F124920,0x3F0F4303,0x00000000
+data4 0xFF28821B,0xC7DD97E0,0x0000BFE2,0x00000000
+data4 0x0000469F,0x3F0D3DC8,0x3F183EBF,0x00000000
+data4 0xEF1FD32F,0xD3C4A887,0x00003FE3,0x00000000
+data4 0x00004445,0x3F088888,0x3F20EC80,0x00000000
+data4 0x464C76DA,0x84672BE6,0x00003FE5,0x00000000
+data4 0x00004211,0x3F042108,0x3F29516A,0x00000000
+data4 0x18835FB9,0x9A43A511,0x0000BFE5,0x00000000
+ASM_SIZE_DIRECTIVE(Constants_log_80_Z_G_H_h1)
+
+.align 64
+Constants_log_80_Z_G_H_h2:
+ASM_TYPE_DIRECTIVE(Constants_log_80_Z_G_H_h2,@object)
+// Z2 - 16 bit fixed, G2 and H2 IEEE single, h2 IEEE double
+data4 0x00008000,0x3F800000,0x00000000,0x00000000
+data4 0x00000000,0x00000000,0x00000000,0x00000000
+data4 0x00007F81,0x3F7F00F8,0x3B7F875D,0x00000000
+data4 0x211398BF,0xAD08B116,0x00003FDB,0x00000000
+data4 0x00007F02,0x3F7E03F8,0x3BFF015B,0x00000000
+data4 0xC376958E,0xB106790F,0x00003FDE,0x00000000
+data4 0x00007E85,0x3F7D08E0,0x3C3EE393,0x00000000
+data4 0x79A7679A,0xFD03F242,0x0000BFDA,0x00000000
+data4 0x00007E08,0x3F7C0FC0,0x3C7E0586,0x00000000
+data4 0x05E7AE08,0xF03F81C3,0x0000BFDF,0x00000000
+data4 0x00007D8D,0x3F7B1880,0x3C9E75D2,0x00000000
+data4 0x049EB22F,0xD1B87D3C,0x00003FDE,0x00000000
+data4 0x00007D12,0x3F7A2328,0x3CBDC97A,0x00000000
+data4 0x3A9E81E0,0xFABC8B95,0x00003FDF,0x00000000
+data4 0x00007C98,0x3F792FB0,0x3CDCFE47,0x00000000
+data4 0x7C4B5443,0xF5F3653F,0x00003FDF,0x00000000
+data4 0x00007C20,0x3F783E08,0x3CFC15D0,0x00000000
+data4 0xF65A1773,0xE78AB204,0x00003FE0,0x00000000
+data4 0x00007BA8,0x3F774E38,0x3D0D874D,0x00000000
+data4 0x7B8EF695,0xDB7CBFFF,0x0000BFE0,0x00000000
+data4 0x00007B31,0x3F766038,0x3D1CF49B,0x00000000
+data4 0xCF773FB3,0xC0241AEA,0x0000BFE0,0x00000000
+data4 0x00007ABB,0x3F757400,0x3D2C531D,0x00000000
+data4 0xC9539FDF,0xFC8F4D48,0x00003FE1,0x00000000
+data4 0x00007A45,0x3F748988,0x3D3BA322,0x00000000
+data4 0x954665C2,0x9CD035FB,0x0000BFE1,0x00000000
+data4 0x000079D1,0x3F73A0D0,0x3D4AE46F,0x00000000
+data4 0xDD367A30,0xEC9017C7,0x00003FE1,0x00000000
+data4 0x0000795D,0x3F72B9D0,0x3D5A1756,0x00000000
+data4 0xCB11189C,0xEE6625D3,0x0000BFE1,0x00000000
+data4 0x000078EB,0x3F71D488,0x3D693B9D,0x00000000
+data4 0xBE11C424,0xA49C8DB5,0x0000BFE0,0x00000000
+ASM_SIZE_DIRECTIVE(Constants_log_80_Z_G_H_h2)
+
+.align 64
+Constants_log_80_h3_G_H:
+ASM_TYPE_DIRECTIVE(Constants_log_80_h3_G_H,@object)
+// h3 IEEE double extended, H3 and G3 IEEE single
+data4 0x112666B0,0xAAACAAB1,0x00003FD3,0x3F7FFC00
+data4 0x9B7FAD21,0x90051030,0x00003FD8,0x3F7FF400
+data4 0xF4D783C4,0xA6B46F46,0x00003FDA,0x3F7FEC00
+data4 0x11C6DDCA,0xDA148D88,0x0000BFD8,0x3F7FE400
+data4 0xCA964D95,0xCE65C1D8,0x0000BFD8,0x3F7FDC00
+data4 0x23412D13,0x883838EE,0x0000BFDB,0x3F7FD400
+data4 0x983ED687,0xB7E5CFA1,0x00003FDB,0x3F7FCC08
+data4 0xE3C3930B,0xDBE23B16,0x0000BFD9,0x3F7FC408
+data4 0x48AA4DFC,0x9B92F1FC,0x0000BFDC,0x3F7FBC10
+data4 0xCE9C8F7E,0x9A8CEB15,0x0000BFD9,0x3F7FB410
+data4 0x0DECE74A,0x8C220879,0x00003FDC,0x3F7FAC18
+data4 0x2F053150,0xB25CA912,0x0000BFDA,0x3F7FA420
+data4 0xD9A5BE20,0xA5876555,0x00003FDB,0x3F7F9C20
+data4 0x2053F087,0xC919BB6E,0x00003FD9,0x3F7F9428
+data4 0x041E9A77,0xB70BDA79,0x00003FDC,0x3F7F8C30
+data4 0xEA1C9C30,0xF18A5C08,0x00003FDA,0x3F7F8438
+data4 0x796D89E5,0xA3790D84,0x0000BFDD,0x3F7F7C40
+data4 0xA2915A3A,0xE1852369,0x0000BFDD,0x3F7F7448
+data4 0xA39ED868,0xD803858F,0x00003FDC,0x3F7F6C50
+data4 0x9417EBB7,0xB2EEE356,0x0000BFDD,0x3F7F6458
+data4 0x9BB0D07F,0xED5C1F8A,0x0000BFDC,0x3F7F5C68
+data4 0xE87C740A,0xD6D201A0,0x0000BFDD,0x3F7F5470
+data4 0x1CA74025,0xE8DEBF5E,0x00003FDC,0x3F7F4C78
+data4 0x1F34A7EB,0x9A995A97,0x0000BFDC,0x3F7F4488
+data4 0x359EED97,0x9CB0F742,0x0000BFDA,0x3F7F3C90
+data4 0xBBC6A1C8,0xD6F833C2,0x0000BFDD,0x3F7F34A0
+data4 0xE71090EC,0xE1F68F2A,0x00003FDC,0x3F7F2CA8
+data4 0xC160A74F,0xD1881CF1,0x0000BFDB,0x3F7F24B8
+data4 0xD78CB5A4,0x9AD05AE2,0x00003FD6,0x3F7F1CC8
+data4 0x9A77DC4B,0xE658CB8E,0x0000BFDD,0x3F7F14D8
+data4 0x6BD6D312,0xBA281296,0x00003FDC,0x3F7F0CE0
+data4 0xF95210D0,0xB478BBEB,0x0000BFDB,0x3F7F04F0
+data4 0x38800100,0x39400480,0x39A00640,0x39E00C41 // H's start here
+data4 0x3A100A21,0x3A300F22,0x3A4FF51C,0x3A6FFC1D
+data4 0x3A87F20B,0x3A97F68B,0x3AA7EB86,0x3AB7E101
+data4 0x3AC7E701,0x3AD7DD7B,0x3AE7D474,0x3AF7CBED
+data4 0x3B03E1F3,0x3B0BDE2F,0x3B13DAAA,0x3B1BD766
+data4 0x3B23CC5C,0x3B2BC997,0x3B33C711,0x3B3BBCC6
+data4 0x3B43BAC0,0x3B4BB0F4,0x3B53AF6D,0x3B5BA620
+data4 0x3B639D12,0x3B6B9444,0x3B7393BC,0x3B7B8B6D
+ASM_SIZE_DIRECTIVE(Constants_log_80_h3_G_H)
+
+.align 64
+Constant_half:
+ASM_TYPE_DIRECTIVE(Constant_half,@object)
+data4 0x00000000,0x80000000,0x00003FFE
+ASM_SIZE_DIRECTIVE(Constant_half)
+
+GR_Expo_Range = r32
+GR_Flag = r33
+GR_Table_Ptr = r34
+
+GR_Table_Ptr1 = r35
+GR_BIAS = r35
+
+GR_Index1 = r36
+GR_sign_mask = r36
+
+GR_Index2 = r37
+GR_Expo_X = r37
+
+GR_signif_Z = r38
+GR_M = r38
+
+GR_X_0 = r39
+GR_Mask = r39
+
+GR_X_1 = r40
+GR_W1_ptr = r40
+
+GR_W2_ptr = r41
+GR_X_2 = r41
+
+GR_Z_1 = r42
+GR_M2 = r42
+
+GR_M1 = r43
+GR_Z_2 = r43
+
+GR_N = r44
+GR_k = r44
+
+GR_Big_Pos_Exp = r45
+
+
+GR_BIAS_p_k = r47
+GR_BIASed_exp_y = r47
+
+GR_Big_Neg_Exp = r48
+GR_Index3 = r48
+GR_temp = r48
+
+GR_vsm_expo = r49
+GR_y_sign = r49
+
+GR_T1_ptr = r50
+GR_T2_ptr = r51
+GR_N_fix = r52
+GR_exp_y = r53
+GR_signif_y = r54
+GR_exp_and_sign_y = r55
+GR_low_order_bit = r56
+GR_get_exp_mask = r57
+GR_exponent_zero = r58
+
+// ** Registers for unwind support
+
+GR_SAVE_PFS = r59
+GR_SAVE_B0 = r60
+GR_SAVE_GP = r61
+GR_Parameter_X = r62
+GR_Parameter_Y = r63
+GR_Parameter_RESULT = r64
+GR_Parameter_TAG = r65
+
+FR_X = f8
+FR_Y = f9
+FR_RESULT = f99
+
+// **
+
+FR_Input_X = f8
+FR_Output = f8
+FR_Input_Y = f9
+
+FR_Neg = f10
+FR_P_hi = f10
+FR_X = f10
+
+FR_Half = f11
+FR_h_3 = f11
+FR_poly_hi = f11
+
+FR_Sgn = f12
+
+FR_Neg_X = f13
+FR_half_W = f13
+
+FR_X_cor = f14
+FR_P_lo = f14
+
+FR_W = f15
+
+FR_X_lo = f32
+
+FR_S = f33
+FR_W3 = f33
+
+FR_Y_hi = f34
+FR_logx_hi = f34
+
+FR_Z = f35
+FR_logx_lo = f35
+FR_GS_hi = f35
+FR_Y_lo = f35
+
+FR_r_cor = f36
+FR_Scale = f36
+
+FR_G_1 = f37
+FR_G = f37
+FR_Wsq = f37
+FR_L_Inv = f37
+FR_temp = f37
+
+FR_H_1 = f38
+FR_H = f38
+FR_W4 = f38
+FR_float_N = f38
+
+FR_h = f39
+FR_h_1 = f39
+FR_N = f39
+FR_P_7 = f39
+
+FR_G_2 = f40
+FR_P_8 = f40
+FR_L_hi = f40
+
+FR_H_2 = f41
+FR_L_lo = f41
+FR_A_1 = f41
+
+FR_h_2 = f42
+FR_P_6 = f42
+
+FR_abs_W = f43
+FR_W1 = f43
+
+FR_G_3 = f44
+FR_P_8 = f44
+FR_T1 = f44
+
+FR_log2_hi = f45
+FR_W2 = f45
+
+FR_GS_lo = f46
+FR_T2 = f46
+
+FR_W_1_p1 = f47
+FR_H_3 = f47
+
+FR_float_N = f48
+
+FR_P_4 = f49
+FR_A_2 = f49
+
+FR_Q_4 = f50
+FR_r4 = f50
+
+FR_Q_3 = f51
+FR_A_3 = f51
+
+FR_Q_2 = f52
+FR_P_2 = f52
+
+FR_Q_1 = f53
+FR_P_1 = f53
+FR_T = f53
+
+FR_Wp1 = f54
+FR_Q_5 = f54
+FR_P_3 = f54
+
+FR_Q_6 = f55
+
+FR_log2_lo = f56
+FR_Two = f56
+
+FR_Big = f57
+
+FR_neg_2_mK = f58
+FR_NBig = f58
+
+FR_r = f59
+
+FR_poly_lo = f60
+
+FR_poly = f61
+
+FR_P_5 = f62
+
+FR_rsq = f63
+
+FR_Result = f99
+FR_Result_small = f100
+FR_Result_big = f101
+
+.section .text
+.proc powl#
+.global powl#
+.align 64
+
+powl:
+{ .mfi
+alloc GR_Expo_Range = ar.pfs,0,30,4,0
+(p0) fclass.m.unc p7, p13 = FR_Input_Y, 0x1E7
+nop.i 0
+}
+{ .mfi
+(p0) getf.exp GR_exp_and_sign_y = FR_Input_Y
+//
+// Save State
+//
+(p0) fclass.m.unc p6, p12 = FR_Input_X, 0x1E7
+nop.i 0
+};;
+{ .mfi
+(p0) getf.sig GR_signif_y = FR_Input_Y
+(p0) fcmp.eq.unc.s1 p12, p13 = FR_Input_X, f1
+nop.i 0
+}
+{ .mfi
+ nop.m 999
+//
+// Check for y = 1
+// Identify EM unsupporteds.
+// Load FR_half = .5
+//
+(p0) fadd.s1 FR_Two = f1, f1
+//
+// Load 1/2 in GP register
+//
+nop.i 0
+}
+;;
+
+{ .mmi
+ nop.m 999
+(p0) addl GR_Table_Ptr = @ltoff(Constant_half#), gp
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ld8 GR_Table_Ptr = [GR_Table_Ptr]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+{ .mlx
+(p0) ldfe FR_Half =[GR_Table_Ptr],0
+(p0) movl GR_get_exp_mask = 0x1FFFF ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fclass.nm.unc p9, p15 = FR_Input_Y, 0x1FF
+//
+// Create FR_Two = 2
+// Get exp and significand of Y
+// Crate Masks
+// sgn = 1
+//
+(p0) and GR_exp_y = GR_get_exp_mask,GR_exp_and_sign_y
+}
+{ .mlx
+ nop.m 999
+(p0) movl GR_exponent_zero = 0xFFFF ;;
+}
+{ .mfi
+ nop.m 999
+(p0) mov FR_Sgn = f1
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fcmp.eq.unc.s1 p10, p11 = FR_Input_Y, f1
+ nop.i 999 ;;
+}
+{ .mfb
+ nop.m 999
+//
+// Identify NatVals, NaNs, Infs, and Zeros.
+// Load Half
+//
+(p0) fclass.nm.unc p8, p14 = FR_Input_X, 0x1FF
+//
+// Remove sign bit from exponent of y.
+// Check for x = 1
+//
+(p6) br.cond.spnt L(POWL_64_SPECIAL) ;;
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+(p7) br.cond.spnt L(POWL_64_SPECIAL) ;;
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+(p8) br.cond.spnt L(POWL_64_UNSUPPORT) ;;
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+(p9) br.cond.spnt L(POWL_64_UNSUPPORT) ;;
+}
+{ .mfi
+(p0) cmp.lt.unc p9, p0 = GR_exp_y,GR_exponent_zero
+(p0) fcmp.lt.unc.s1 p6, p13 = FR_Input_X, f0
+//
+// Branch on Infs, Nans, Zeros, and Natvals
+// Check to see that exponent < 0
+//
+(p0) sub GR_exp_y = GR_exp_y,GR_exponent_zero
+}
+// x not zero, is y ==2?
+{ .mfi
+ nop.m 999
+(p11) fcmp.eq.unc.s1 p7, p14 = FR_Input_Y, FR_Two
+ nop.i 999 ;;
+}
+{ .mfb
+ nop.m 999
+(p9) fcmp.lt.unc.s1 p9, p0 = FR_Input_X, f0
+(p7) br.cond.spnt L(POWL_64_SQUARE) ;; // Branch if x not zero and y=2
+}
+{ .mfi
+ nop.m 999
+(p6) fmerge.ns FR_Neg_X = FR_Input_X, FR_Input_X
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p10) fmpy.s0 FR_Result = FR_Input_X, f1
+//
+// For y = 1, compute result = x
+// For x = 1, compute 1
+// When Y is one return X and possible raise
+// denormal operand exception.
+// Remove exponent BIAS
+//
+(p6) shl GR_exp_and_sign_y= GR_signif_y,GR_exp_y ;;
+}
+{ .mfi
+(p9) or GR_exp_and_sign_y = 0xF,GR_signif_y
+(p12) fma.s0 FR_Result = FR_Input_Y, f0, f1
+ nop.i 999 ;;
+}
+{ .mii
+ nop.m 999
+(p6) extr.u GR_exp_y = GR_exp_and_sign_y,63,1 ;;
+(p6) cmp.ne.unc p9, p0 = GR_exp_y, r0
+}
+{ .mii
+ nop.m 999
+//
+// Both predicates can be set.
+// Don't consider y's < 1.
+//
+(p6) shl GR_signif_y= GR_exp_and_sign_y,1 ;;
+//
+// Is shift off integer part of y.
+// Get y's even or odd bit.
+//
+(p6) cmp.ne.unc p8, p0 = GR_signif_y, r0
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+//
+// Is the fractional part of the y = 0?
+// Is the integer even or odd.
+//
+(p10) br.cond.spnt L(POWL_64_RETURN) ;;
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+(p12) br.cond.spnt L(POWL_64_RETURN) ;;
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+(p8) br.cond.spnt L(POWL_64_XNEG) ;;
+}
+{ .mfi
+ nop.m 999
+(p9) fmerge.ns FR_Sgn = FR_Sgn, FR_Sgn
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fcmp.eq.unc.s0 p11, p0 = FR_Input_Y, FR_Half
+ nop.i 999 ;;
+}
+//
+// Raise possible denormal operand exception for both
+// X and Y.
+//
+{ .mfb
+ nop.m 999
+//
+// Branch for (x < 0) and Y not an integer.
+//
+(p0) fcmp.eq.unc.s0 p12, p0 = FR_Input_X, f1
+//
+// For x < 0 and y integer, make x positive
+// For x < 0 and y odd integer,, set sign = -1.
+//
+(p11) br.cond.spnt L(POWL_64_SQRT) ;;
+}
+{ .mmf
+(p0) cmp.eq.unc p15, p14 = r0, r0
+ nop.m 999
+(p13) fnorm.s1 FR_Z = FR_Input_X ;;
+}
+{ .mfi
+ nop.m 999
+(p6) fnorm.s1 FR_Z = FR_Neg_X
+ nop.i 999
+}
+;;
+
+//
+// Branch to embedded sqrt(x)
+//
+//
+// Computes ln( x ) to extra precision
+// Input FR 1: FR_X
+// Output FR 2: FR_Y_hi
+// Output FR 3: FR_Y_lo
+// Output PR 1: PR_Safe
+//
+
+{ .mmi
+ nop.m 999
+(p0) addl GR_Table_Ptr = @ltoff(Constants_log_80_Z_G_H_h1#), gp
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ld8 GR_Table_Ptr = [GR_Table_Ptr]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+
+{ .mlx
+ nop.m 999
+(p0) movl GR_BIAS = 0x000000000000FFFF ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fsub.s1 FR_W = FR_Z, f1
+ nop.i 999 ;;
+}
+//
+// Z = Norm(X) - both + and - case
+// Set Safe = True
+//
+{ .mmb
+(p0) getf.sig GR_signif_Z = FR_Z
+(p0) getf.exp GR_N = FR_Z
+ nop.b 999 ;;
+}
+{ .mii
+ nop.m 999
+//
+// Get significand of Z
+// W = Z - 1
+//
+(p0) extr.u GR_Index1 = GR_signif_Z, 59, 4 ;;
+//
+// Index1 = High order 4 bits of Z
+// X_0 = High order 15 bit of Z
+//
+(p0) shl GR_Index1 = GR_Index1,5 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Add offset to Index1 ptr.
+//
+(p0) fabs FR_abs_W = FR_W
+//
+// BIAS = 0x000...FFFF
+// Adjust Index1 ptr ( x 32) .
+//
+(p0) add GR_Index1 = GR_Index1,GR_Table_Ptr
+}
+{ .mmi
+ nop.m 999 ;;
+(p0) ld2 GR_Z_1 =[GR_Index1],4
+(p0) extr.u GR_X_0 = GR_signif_Z, 49, 15
+}
+;;
+
+{ .mmi
+ nop.m 999
+(p0) addl GR_Table_Ptr = @ltoff(Constants_log_80_Z_G_H_h2#), gp
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ld8 GR_Table_Ptr = [GR_Table_Ptr]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+
+{ .mmi
+(p0) ldfs FR_G_1 = [GR_Index1],4 ;;
+(p0) ldfs FR_H_1 = [GR_Index1],8
+ nop.i 999 ;;
+}
+//
+// Adjust Index2 (x 32).
+//
+{ .mfi
+(p0) ldfe FR_h_1 = [GR_Index1],0
+ nop.f 999
+(p0) pmpyshr2.u GR_X_1 = GR_X_0,GR_Z_1,15 ;;
+}
+{ .mmi
+ nop.m 999 ;;
+//
+// load Z_1 from Index1
+// abs_W = |W|
+// Point to Table2
+//
+(p0) getf.exp GR_M = FR_abs_W
+//
+// M = M - BIAS
+// Load G_1
+// N = exponent of Z
+//
+ nop.i 999;;
+}
+{ .mmi
+ nop.m 999
+ nop.m 999
+ nop.i 999;;
+}
+{ .mmi
+ nop.m 999
+ nop.m 999
+ nop.i 999;;
+}
+{ .mmi
+ nop.m 999
+ nop.m 999
+(p0) extr.u GR_Index2 = GR_X_1, 6, 4 ;;
+}
+{ .mii
+ nop.m 999
+//
+// Extract Index2
+// Load H_1
+// Is -8 > M ?
+//
+(p0) shl GR_Index2=GR_Index2,5 ;;
+(p0) add GR_Index2 = GR_Index2, GR_Table_Ptr
+}
+//
+// M = exponent of abs_W
+// X_1 = X_0 * Z_1
+//
+{ .mii
+(p0) sub GR_M = GR_M, GR_BIAS
+ nop.i 999 ;;
+(p0) cmp.gt.unc p7, p14 = -8, GR_M
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+(p7) br.cond.spnt L(LOGL80_NEAR) ;;
+}
+//
+// Load h_1
+// Possible branch out.
+// Add offset of table to Index2
+//
+{ .mfi
+(p0) ld2 GR_Z_2 =[GR_Index2],4
+(p0) fmerge.se FR_S = f1,FR_Z
+(p0) sub GR_N = GR_N, GR_BIAS
+}
+;;
+
+{ .mmi
+ nop.m 999
+(p0) addl GR_Table_Ptr = @ltoff(Constants_log_80_h3_G_H#), gp
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ld8 GR_Table_Ptr = [GR_Table_Ptr]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+//
+// load Z_2
+// N - BIAS
+// Point to Table 3.
+// S = merging of Z and 1.0
+//
+{ .mmi
+(p0) ldfs FR_G_2 = [GR_Index2],4
+(p0) setf.sig FR_float_N = GR_N
+(p0) add GR_Table_Ptr1 = 0x200,GR_Table_Ptr ;;
+}
+//
+// load G_2
+// X_2 = X_1 * Z_2
+// Add offset to Table 2 ptr.
+// float_N = significand of N
+//
+{ .mmi
+(p0) ldfs FR_H_2 = [GR_Index2],8 ;;
+//
+// load H_2
+// G = G * G_2
+//
+(p0) ldfe FR_h_2 = [GR_Index2],0
+(p0) pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15 ;;
+}
+{ .mmi
+ nop.m 999
+ nop.m 999
+ nop.i 999;;
+}
+{ .mmi
+ nop.m 999
+ nop.m 999
+ nop.i 999;;
+}
+{ .mmi
+ nop.m 999
+ nop.m 999
+ nop.i 999;;
+}
+{ .mii
+ nop.m 999
+ nop.i 999 ;;
+(p0) extr.u GR_Index3 = GR_X_2, 1, 5 ;;
+}
+{ .mfi
+(p0) shladd GR_Table_Ptr1 = GR_Index3,2,GR_Table_Ptr1
+ nop.f 999
+//
+// h = h_1 + h_2
+// Adjust Index3
+//
+(p0) shladd GR_Index3 = GR_Index3,4,GR_Table_Ptr ;;
+}
+{ .mmb
+ nop.m 999
+(p0) ldfe FR_h_3 = [GR_Index3],12
+ nop.b 999 ;;
+}
+{ .mmf
+(p0) ldfs FR_H_3 = [GR_Table_Ptr1],0
+//
+// float_N = Make N a fp number
+// Load h_3
+// Get pointer to Q table.
+//
+(p0) ldfs FR_G_3 = [GR_Index3],0
+(p0) fmpy.s1 FR_G = FR_G_1, FR_G_2
+}
+;;
+
+{ .mmi
+ nop.m 999
+(p0) addl GR_Table_Ptr = @ltoff(Constants_log_80_Q#), gp
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ld8 GR_Table_Ptr = [GR_Table_Ptr]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+
+
+{ .mfi
+(p0) ldfe FR_log2_hi = [GR_Table_Ptr],16
+(p0) fadd.s1 FR_H = FR_H_1, FR_H_2
+ nop.i 999 ;;
+}
+{ .mmf
+ nop.m 999
+//
+// G = G_1 * G_2 * G_3
+//
+(p0) ldfe FR_log2_lo = [GR_Table_Ptr],16
+//
+// load h_2
+// H = H_1 + H_2
+// Get Index3
+//
+(p0) fadd.s1 FR_h = FR_h_1, FR_h_2 ;;
+}
+//
+// Load log2_lo part
+// r = G*S -1
+//
+{ .mfi
+(p0) ldfe FR_Q_6 = [GR_Table_Ptr],16
+//
+// Load H_3
+//
+(p0) fcvt.xf FR_float_N = FR_float_N
+ nop.i 999 ;;
+}
+//
+// Load Q_6
+//
+{ .mmi
+(p0) ldfe FR_Q_5 = [GR_Table_Ptr],16 ;;
+(p0) ldfe FR_Q_4 = [GR_Table_Ptr],16
+ nop.i 999 ;;
+}
+{ .mmi
+(p0) ldfe FR_Q_3 = [GR_Table_Ptr],16 ;;
+(p0) ldfe FR_Q_2 = [GR_Table_Ptr],16
+ nop.i 999 ;;
+}
+{ .mmf
+ nop.m 999
+//
+// poly_lo = Q_5 + r * Q_6
+// Load Q_2
+// rsq = r * r
+//
+(p0) ldfe FR_Q_1 = [GR_Table_Ptr],16
+//
+// h = h_1 + h_2 + h_3
+// H = H_1 + H_2 + H_3
+// Load G_3.
+// Begin Loading Q's - load log2_hi part
+//
+(p0) fmpy.s1 FR_G = FR_G, FR_G_3
+}
+{ .mfi
+ nop.m 999
+(p0) fadd.s1 FR_H = FR_H, FR_H_3
+ nop.i 999
+}
+;;
+
+//
+// Y_lo = poly + Y_lo
+//
+
+{ .mmi
+ nop.m 999
+(p0) addl GR_Table_Ptr = @ltoff(Constants_exp_64_Arg#), gp
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ld8 GR_Table_Ptr = [GR_Table_Ptr]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+
+{ .mfi
+ nop.m 999
+(p0) fadd.s1 FR_h = FR_h, FR_h_3
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Load Q_5
+//
+(p0) fmpy.s1 FR_GS_hi = FR_G, FR_S
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fms.s1 FR_r = FR_G, FR_S, f1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fma.s1 FR_poly_lo = FR_r, FR_Q_6, FR_Q_5
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+// GS_hi = G*S
+// Load Q_4
+//
+(p0) fsub.s1 FR_r_cor = FR_GS_hi, f1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fms.s1 FR_GS_lo = FR_G, FR_S, FR_GS_hi
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fma.s1 FR_poly = FR_r, FR_Q_2, FR_Q_1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Load Q_3
+// r_cor = GS_hi -1
+// GS_lo = G*S - GS_hi
+//
+(p0) fmpy.s1 FR_rsq = FR_r, FR_r
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fma.s1 FR_G = FR_float_N, FR_log2_hi, FR_H
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// poly = poly_hi + rsq * poly_lo
+// Tbl = float_N*log2_hi + H
+//
+(p0) fma.s1 FR_Y_lo = FR_float_N, FR_log2_lo, FR_h
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// r_cor = r_cor - r
+// poly_hi = r * Q_2 + Q_1
+//
+(p0) fma.s1 FR_poly_lo = FR_r, FR_poly_lo, FR_Q_4
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+// Load Q_1
+//
+(p0) fsub.s1 FR_r_cor = FR_r_cor, FR_r
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Y_lo = float_N*log2_lo + h
+//
+(p0) fadd.s1 FR_Y_hi = FR_G, FR_r
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// poly_lo = Q_4 + r * poly_lo;;
+// r_cor = r_cor + GS_lo;;
+//
+(p0) fma.s1 FR_poly_lo = FR_r, FR_poly_lo, FR_Q_3
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fadd.s1 FR_r_cor = FR_r_cor, FR_GS_lo
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fadd.s1 FR_r_cor = FR_r_cor, FR_Y_lo
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+// poly_lo = Q_3 + r * poly_lo;;
+//
+(p0) fma.s1 FR_poly = FR_rsq, FR_poly_lo, FR_poly
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fsub.s1 FR_Y_lo = FR_G, FR_Y_hi
+ nop.i 999
+}
+{ .mmi
+(p0) ldfe FR_L_Inv = [GR_Table_Ptr],16 ;;
+(p0) ldfe FR_L_hi = [GR_Table_Ptr],16
+ nop.i 999 ;;
+}
+{ .mfi
+(p0) ldfe FR_L_lo = [GR_Table_Ptr],16
+ nop.f 999
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Y_hi = Tbl + r
+// r_cor = r_cor + Y_lo
+//
+(p0) fma.s1 FR_poly = FR_rsq, FR_poly, FR_r_cor
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+// Y_lo = Tbl - Y_hi
+// poly = rsq * poly + r_cor
+//
+(p0) fadd.s1 FR_Y_lo = FR_Y_lo, FR_r
+ nop.i 999 ;;
+}
+{ .mfb
+ nop.m 999
+//
+// Y_lo = Y_lo + r
+//
+(p0) fadd.s1 FR_Y_lo = FR_Y_lo, FR_poly
+//
+// Load L_Inv
+// Load L_hi
+// Load L_lo
+// all long before they are needed.
+// They are used in LOGL_RETURN PATH
+//
+br.cond.sptk L(LOGL_RETURN) ;;
+}
+L(LOGL80_NEAR):
+//
+// Branch LOGL80_NEAR
+//
+
+{ .mmi
+ nop.m 999
+(p0) addl GR_Table_Ptr = @ltoff(Constants_log_80_P#), gp
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ld8 GR_Table_Ptr = [GR_Table_Ptr]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p0) fmpy.s1 FR_Wsq = FR_W, FR_W
+(p0) add GR_Table_Ptr1 = 0x50,GR_Table_Ptr
+}
+//
+// Adjust ptr to 1/2
+// Adjust Ptr1 to P_4
+//
+{ .mmi
+(p0) ldfe FR_Half = [GR_Table_Ptr],16 ;;
+(p0) ldfe FR_P_4 = [GR_Table_Ptr1],16
+ nop.i 999
+}
+//
+// Load 1/2
+//
+{ .mmi
+(p0) ldfe FR_P_8 = [GR_Table_Ptr],16 ;;
+(p0) ldfe FR_P_3 = [GR_Table_Ptr1],16
+ nop.i 999
+}
+{ .mmi
+(p0) ldfe FR_P_7 = [GR_Table_Ptr],16 ;;
+(p0) ldfe FR_P_2 = [GR_Table_Ptr1],16
+ nop.i 999
+}
+//
+// Load P_7
+// half_W = .5 * W
+// Load P_3
+//
+{ .mmi
+(p0) ldfe FR_P_6 = [GR_Table_Ptr],16 ;;
+(p0) ldfe FR_P_1 = [GR_Table_Ptr1],16
+ nop.i 999 ;;
+}
+//
+// Load P_6
+// Wsq = w * w
+// poly = w*P_4 + P_3
+// Load P_2
+//
+{ .mfi
+(p0) ldfe FR_P_5 = [GR_Table_Ptr],16
+//
+// Load P_5
+// poly_lo = w * P_8 + P_7
+// Y_hi = w - (1/2)w*w
+// Load P_1
+//
+(p0) fmpy.s1 FR_W4 = FR_Wsq, FR_Wsq
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fmpy.s1 FR_W3 = FR_Wsq, FR_W
+ nop.i 999
+}
+;;
+
+//
+// Y_lo = W3 * poly + Y_lo
+//
+
+{ .mmi
+ nop.m 999
+(p0) addl GR_Table_Ptr = @ltoff(Constants_exp_64_Arg#), gp
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ld8 GR_Table_Ptr = [GR_Table_Ptr]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+
+{ .mmi
+(p0) ldfe FR_L_Inv = [GR_Table_Ptr],16 ;;
+(p0) ldfe FR_L_hi = [GR_Table_Ptr],16
+ nop.i 999 ;;
+}
+{ .mfi
+(p0) ldfe FR_L_lo = [GR_Table_Ptr],16
+//
+// Load P_8
+// Load P_4
+//
+(p0) fmpy.s1 FR_half_W = FR_Half, FR_W
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fma.s1 FR_poly_lo = FR_W, FR_P_8,FR_P_7
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fma.s1 FR_poly = FR_W, FR_P_4, FR_P_3
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fnma.s1 FR_Y_hi = FR_W, FR_half_W, FR_W
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// W4 = Wsq * Wsq
+// poly = w *poly + P_2
+//
+(p0) fma.s1 FR_poly_lo = FR_W, FR_poly_lo, FR_P_6
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fma.s1 FR_poly = FR_W, FR_poly, FR_P_2
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fsub.s1 FR_Y_lo = FR_W, FR_Y_hi
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// poly = w * poly + P_1
+// w3 = wsq * w
+//
+(p0) fma.s1 FR_poly_lo = FR_W, FR_poly_lo, FR_P_5
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+// poly_lo = w * poly_lo + P_6
+// Y_lo = W - Y_hi
+//
+(p0) fma.s1 FR_poly = FR_W, FR_poly, FR_P_1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fnma.s1 FR_Y_lo = FR_W, FR_half_W, FR_Y_lo
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// poly_lo = w * poly_lo +
+// Y_lo = Y_lo - w * (1/2)w
+//
+(p0) fma.s1 FR_poly = FR_poly_lo, FR_W4, FR_poly
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Y_lo = (W-Y_hi) - w * (1/2)w
+// poly = W4* poly_lo + poly
+//
+(p0) fma.s1 FR_Y_lo = FR_poly, FR_W3, FR_Y_lo
+ nop.i 999 ;;
+}
+L(LOGL_RETURN):
+{ .mfi
+(p0) add GR_Expo_Range = 0x2,r0
+//
+// Load L_Inv
+// Load L_hi
+// Load L_lo
+// all long before they are needed.
+//
+//
+// kernel_log_80 computed ln(X)
+// and return logX_hi and logX_lo as results.
+// PR_pow_Safe set as well.
+//
+(p0) fmpy.s1 FR_X_lo = FR_Input_Y, FR_logx_lo
+//
+// Compute Y * (logX_hi + logX_lo)
+// P_hi -> X
+// P_lo -> X_cor
+// (Manipulate names so that inputs are in
+// the place kernel_exp expects them)
+// Set GR_Flag to 2
+// Set GR_Expo_Range to Double
+//
+// This function computes exp( x + x_cor)
+// Input FR 1: FR_X
+// Input FR 2: FR_X_cor
+// Input GR 1: GR_Flag
+// Input GR 2: GR_Expo_Range
+// Output FR 3: FR_Y_hi
+// Output FR 4: FR_Y_lo
+// Output FR 5: FR_Scale
+// Output PR 1: PR_Safe
+//
+(p0) cmp.eq.unc p15, p0 = r0, r0
+}
+;;
+
+{ .mmi
+(p0) addl GR_W1_ptr = @ltoff(Constants_exp_64_W1#), gp
+(p0) addl GR_W2_ptr = @ltoff(Constants_exp_64_W2#), gp
+(p0) add GR_Flag = 0x2,r0
+}
+;;
+
+{ .mmi
+ ld8 GR_W1_ptr = [GR_W1_ptr]
+ ld8 GR_W2_ptr = [GR_W2_ptr]
+(p0) cmp.ne.unc p7, p0 = 0x1, GR_Flag
+}
+;;
+
+{ .mlx
+ nop.m 999
+(p0) movl GR_Mask = 0x1FFFF ;;
+}
+
+
+{ .mlx
+ nop.m 999
+(p0) movl GR_BIAS = 0x0FFFF ;;
+}
+{ .mfi
+ nop.m 999
+//
+// X_lo = Y * logX_lo
+//
+(p0) fma.s1 FR_P_hi = FR_Input_Y, FR_logx_hi,FR_X_lo
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Set Safe=True
+// Flag is always 2 for this routine
+//
+(p0) fmpy.s1 FR_float_N = FR_X, FR_L_Inv
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+// X_hi = Y * logX_hi + X_lo
+// Set GR_Flag = 2 for exp(x + xcor)
+//
+(p0) fms.s1 FR_P_lo= FR_Input_Y, FR_logx_hi, FR_P_hi
+ nop.i 999 ;;
+}
+{ .mmi
+ nop.m 999 ;;
+(p0) getf.exp GR_Expo_X = FR_X
+ nop.i 999 ;;
+}
+{ .mfi
+(p0) and GR_Expo_X = GR_Expo_X, GR_Mask
+//
+// Calculate unBIASed exponent of X
+// Point to Table of W1s
+// Point to Table of W2s
+//
+(p0) fcvt.fx.s1 FR_N = FR_float_N
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fadd.s1 FR_P_lo = FR_P_lo, FR_X_lo
+//
+// Float_N = X * L_Inv
+// Create exponent BIAS
+// Get BIASed exponent of X
+//
+(p0) sub GR_Expo_X = GR_Expo_X, GR_BIAS ;;
+}
+{ .mib
+(p0) cmp.gt.unc p9, p0 = -6, GR_Expo_X
+ nop.i 999
+//
+// N = fcvt.fx(float_N)
+// If -6 > Expo_X, set P9
+//
+(p9) br.cond.spnt L(EXPL_SMALL)
+}
+;;
+
+//
+// If expo_X < -6 goto exp_small
+//
+{ .mmi
+ nop.m 999
+(p0) addl GR_T1_ptr = @ltoff(Constants_exp_64_T1#), gp
+(p0) cmp.lt.unc p10, p0 = 14, GR_Expo_X
+}
+;;
+
+{ .mmi
+ ld8 GR_T1_ptr = [GR_T1_ptr]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+{ .mib
+ nop.m 999
+ nop.i 999
+//
+// If 14 < Expo_X, set P10
+// Create pointer to T1 table
+//
+(p10) br.cond.spnt L(EXPL_HUGE) ;;
+}
+
+
+{ .mmi
+(p0) addl GR_Table_Ptr = @ltoff(Constants_exp_64_Exponents#), gp
+(p0) addl GR_T2_ptr = @ltoff(Constants_exp_64_T2#), gp
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ld8 GR_Table_Ptr = [GR_Table_Ptr]
+ ld8 GR_T2_ptr = [GR_T2_ptr]
+ nop.i 999
+}
+;;
+
+
+{ .mmi
+(p0) shladd GR_Table_Ptr = GR_Expo_Range,4,GR_Table_Ptr ;;
+//
+// Adjust T1_ptr by x 4 for single-precision values
+// Adjust T2_ptr by x 4 for single-precision values
+//
+(p0) ld8 GR_Big_Pos_Exp = [GR_Table_Ptr],8
+ nop.i 999 ;;
+}
+//
+// Load double W1
+// Load +max exponent
+//
+{ .mfi
+(p0) ld8 GR_Big_Neg_Exp = [GR_Table_Ptr],0
+//
+// If 14 < Expo_X, goto exp_huge
+//
+(p0) fcvt.xf FR_float_N = FR_N
+ nop.i 999
+}
+;;
+
+//
+// Load double W2
+// Load -max exponent
+// Load ptr to A's
+//
+
+{ .mmi
+(p0) getf.sig GR_N_fix = FR_N
+(p0) addl GR_Table_Ptr = @ltoff(Constants_exp_64_A#), gp
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ld8 GR_Table_Ptr = [GR_Table_Ptr]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+//
+// Load single T1
+// Load single T2
+// W_1_p1 = W_1 + 1
+//
+{ .mmi
+(p0) ldfe FR_A_3 = [GR_Table_Ptr],16 ;;
+//
+// Load A_3
+// if k > big_pos_exp, set p14 and Safe=False
+//
+(p0) ldfe FR_A_2 = [GR_Table_Ptr],16
+(p0) extr.u GR_M1 = GR_N_fix, 6, 6
+}
+{ .mmi
+ nop.m 999 ;;
+(p0) shladd GR_W1_ptr = GR_M1,3,GR_W1_ptr
+//
+// float_N = fcvt.xf(N)
+// N_fix = significand of N
+// Create pointer to T2 table
+//
+(p0) extr.u GR_M2 = GR_N_fix, 0, 6
+}
+//
+// r = r + X_cor
+// Adjust W1_ptr by x 8 for double-precision values
+// Adjust W2_ptr by x 8 for double-precision values
+// Adjust Table_ptr by Expo_Rangex16
+//
+{ .mmi
+(p0) shladd GR_T1_ptr = GR_M1,2,GR_T1_ptr ;;
+(p0) ldfd FR_W1 = [GR_W1_ptr],0
+(p0) shladd GR_W2_ptr = GR_M2,3,GR_W2_ptr
+}
+//
+// Load ptr to A's
+//
+{ .mfi
+(p0) ldfs FR_T1 = [GR_T1_ptr],0
+(p0) fnma.s1 FR_r = FR_L_hi, FR_float_N, FR_X
+(p0) shladd GR_T2_ptr = GR_M2,2,GR_T2_ptr ;;
+}
+{ .mmi
+(p0) ldfd FR_W2 = [GR_W2_ptr],0
+(p0) ldfs FR_T2 = [GR_T2_ptr],0
+//
+// r = x - L_hi * float_N
+// M2 = extr.u(N_fix,0,6)
+// M1 = extr.u(N_fix,6,6)
+//
+(p0) extr GR_k = GR_N_fix, 12, 52 ;;
+}
+//
+// Load A_1
+// poly = A_3 * r + A_2
+// rsq = r*r
+//
+{ .mii
+(p0) add GR_BIAS_p_k = GR_BIAS, GR_k
+(p0) cmp.gt.unc p14,p15 = GR_k,GR_Big_Pos_Exp ;;
+(p15) cmp.lt p14,p15 = GR_k,GR_Big_Neg_Exp
+}
+//
+// BIAS_p_K = BIAS + k
+// T = T1 * T2
+//
+{ .mfi
+(p0) setf.exp FR_Scale = GR_BIAS_p_k
+ nop.f 999
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fnma.s1 FR_r = FR_L_lo, FR_float_N, FR_r
+ nop.i 999
+}
+//
+// W = W_1_p1 * W2 + W1
+//
+{ .mfi
+(p0) ldfe FR_A_1 = [GR_Table_Ptr],16
+ nop.f 999
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fadd.s1 FR_W_1_p1 = FR_W1, f1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// k = extr.u(N_fix,0,6)
+// r = r - N * L_lo
+// Load ptr to Table of exponent thresholds.
+//
+(p0) fadd.s1 FR_r = FR_r, FR_X_cor
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fmpy.s1 FR_T = FR_T1, FR_T2
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// if k < big_neg_exp, set p14 and Safe=False
+// Load A_2
+//
+(p0) fma.s1 FR_W = FR_W2, FR_W_1_p1, FR_W1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fma.s1 FR_poly = FR_r, FR_A_3, FR_A_2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fmpy.s1 FR_rsq = FR_r, FR_r
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) mov FR_Y_hi = FR_T
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Scale = set_exp(BIAS_p_k)
+// poly = r * poly + A_1
+//
+(p0) fadd.s1 FR_Wp1 = FR_W, f1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fma.s1 FR_poly = FR_r, FR_poly, FR_A_1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fma.s1 FR_poly = FR_rsq, FR_poly,FR_r
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Wp1 = W + 1
+// poly = rsq * poly + rk
+//
+(p0) fma.s1 FR_Y_lo = FR_Wp1, FR_poly, FR_W
+ nop.i 999 ;;
+}
+{ .mfb
+ nop.m 999
+//
+// Y_lo = poly * Wp1 + W
+// Y_hi = T
+//
+(p0) fmpy.s1 FR_Y_lo = FR_Y_lo, FR_T
+//
+// Y_lo = T * Y_lo
+//
+(p0) br.cond.sptk L(EXPL_RETURN) ;;
+}
+
+L(EXPL_SMALL):
+
+//
+// r4 = rsq * rsq
+//
+
+{ .mmi
+ nop.m 999
+(p0) addl GR_Table_Ptr1 = @ltoff(Constants_exp_64_P), gp
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ld8 GR_Table_Ptr1 = [GR_Table_Ptr1]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+{ .mmf
+ nop.m 999
+(p0) ldfe FR_P_6 = [GR_Table_Ptr1],16
+//
+// Return
+//
+(p0) fadd.s1 FR_r = FR_X,f0 ;;
+}
+
+{ .mmi
+ nop.m 999
+(p0) addl GR_Table_Ptr = @ltoff(Constants_exp_64_Exponents#), gp
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ld8 GR_Table_Ptr = [GR_Table_Ptr]
+(p0) ldfe FR_P_5 = [GR_Table_Ptr1],16
+ nop.i 999
+}
+;;
+
+//
+// Is input very small?
+// Load P_5
+//
+{ .mii
+(p0) ldfe FR_P_4 = [GR_Table_Ptr1],16
+(p0) add GR_Table_Ptr = 0x040,GR_Table_Ptr ;;
+(p0) shladd GR_Table_Ptr = GR_Expo_Range,3,GR_Table_Ptr ;;
+}
+{ .mmb
+(p0) ldfe FR_P_3 = [GR_Table_Ptr1],16
+//
+// Adjust ptr.
+//
+(p0) ld8 GR_vsm_expo = [GR_Table_Ptr],0
+ nop.b 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// r = X (don't seem to need X_Cor)
+// Load the threshold exponents
+//
+(p0) fmpy.s1 FR_rsq = FR_r, FR_r
+ nop.i 999 ;;
+}
+//
+// Load the negative integer
+// Load P_5
+//
+{ .mfi
+(p0) cmp.lt.unc p12, p0 = GR_Expo_X, GR_vsm_expo
+ nop.f 999
+ nop.i 999 ;;
+}
+{ .mfb
+ nop.m 999
+//
+// rsq = r * r
+// Offset into exponents
+//
+(p0) fmpy.s1 FR_r4 = FR_rsq, FR_rsq
+(p12) br.cond.spnt L(EXPL_VERY_SMALL) ;;
+}
+{ .mfi
+(p0) ldfe FR_P_2 = [GR_Table_Ptr1],16
+//
+// Load p4,p3,p2,p1
+//
+(p0) fma.s1 FR_poly_lo = FR_P_6, FR_r, FR_P_5
+//
+// Y_lo = r4 * poly_lo + poly_hi
+// Scale = 1.0
+//
+(p0) add GR_temp = 0x1,r0 ;;
+}
+{ .mmf
+ nop.m 999
+(p0) ldfe FR_P_1 = [GR_Table_Ptr1],0
+(p0) mov FR_Scale = f1
+}
+//
+// Begin creating lsb to perturb final result
+//
+{ .mfi
+(p0) setf.sig FR_temp = GR_temp
+(p0) mov FR_Y_hi = f1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// poly_lo = p_5 + p_6 * r
+// poly_hi = p_1 + p_2 * r
+//
+(p0) fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_P_4
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// poly_lo = p_4 + poly_lo * r
+// poly_hi = r + poly_hi * rsq
+//
+(p0) fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_P_3
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fma.s1 FR_poly_hi = FR_P_2, FR_r, FR_P_1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fma.s1 FR_poly_hi = FR_poly_hi, FR_rsq, FR_r
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// poly_lo = p_3 + poly_lo * r
+// Y_hi = 1, always
+//
+(p0) fma.s1 FR_Y_lo = FR_poly_lo, FR_r4, FR_poly_hi
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Set lsb in fp register
+//
+(p0) for FR_temp = FR_Y_lo,FR_temp
+ nop.i 999 ;;
+}
+{ .mfb
+ nop.m 999
+//
+// Toggle on last bit of Y_lo
+//
+(p0) fmerge.se FR_Y_lo = FR_Y_lo,FR_temp
+//
+// Set lsb of Y_lo to 1
+//
+(p0) br.cond.sptk L(EXPL_RETURN) ;;
+}
+L(EXPL_VERY_SMALL):
+{ .mfi
+ nop.m 999
+(p0) mov FR_Y_lo = FR_r
+(p0) cmp.eq.unc p15, p0 = r0, r0
+}
+{ .mfi
+ nop.m 999
+(p0) mov FR_Scale = f1
+ nop.i 999
+};;
+{ .mfb
+ nop.m 999
+(p0) mov FR_Y_hi = f1
+//
+// If flag_not_1,
+// Y_hi = 1.0
+// Y_lo = X + X_cor
+// PR_Safe = true
+//
+(p0) br.cond.sptk L(EXPL_RETURN) ;;
+}
+L(EXPL_HUGE):
+{ .mfi
+ nop.m 999
+//
+// Return for flag=2
+//
+(p0) fcmp.gt.unc.s1 p12, p13 = FR_X, f0
+(p0) cmp.eq.unc p14, p15 = r0, r0 ;;
+}
+{ .mlx
+ nop.m 999
+//
+// Set Safe to false
+// Is x > 0
+//
+(p12) movl GR_Mask = 0x15DC0 ;;
+}
+{ .mlx
+(p12) setf.exp FR_Y_hi = GR_Mask
+(p13) movl GR_Mask = 0xA240 ;;
+}
+{ .mlx
+(p13) setf.exp FR_Y_hi = GR_Mask
+//
+// x > 0: Create mask for Y_hi = 2**(24,000)
+// x <= 0: Create mask for Y_hi = 2**(-24,000)
+//
+(p13) movl GR_temp = 0xA1DC ;;
+}
+{ .mfi
+(p13) setf.exp FR_Y_lo = GR_temp
+//
+// x < =0: Create mask for 2**(-24,100)
+// x <= 0: Y_lo = w**(-24,100)
+//
+(p12) mov FR_Y_lo = f1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p12) mov FR_Scale = FR_Y_hi
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// x > 0: Y_lo = 1.0
+// x > 0: Scale = 2**(24,000)
+//
+(p13) mov FR_Scale = FR_Y_hi
+ nop.i 999 ;;
+}
+L(EXPL_RETURN):
+{ .mfi
+ nop.m 999
+//
+// Scale = 2**(24,000)
+//
+//
+// exp(y *ln(x)) almost complete
+// FR_Scale is Scale
+// f34 is Z_hi
+// f35 is Z_lo
+//
+(p0) fmpy.s1 FR_Sgn = FR_Scale, FR_Sgn
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// sgn * scale
+//
+(p0) fmpy.s1 FR_Y_lo = FR_Y_lo,FR_Sgn
+ nop.i 999 ;;
+}
+{ .mfb
+ nop.m 999
+//
+// Z_lo * (sgn * scale)
+//
+(p0) fma.s0 FR_Result = FR_Y_hi, FR_Sgn, FR_Y_lo
+//
+// Z_hi * (sgn * scale) + Z_lo
+//
+(p15) br.cond.sptk L(POWL_64_RETURN) ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fsetc.s3 0x7F,0x01
+ nop.i 999
+}
+{ .mlx
+ nop.m 999
+//
+// Z_hi * (sgn * scale) + Z_lo with wre & td
+// Z_hi * (sgn * scale) + Z_lo with fz & td
+//
+(p0) movl GR_T1_ptr = 0x00000000013FFF ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fma.s3 FR_Result_small = FR_Y_hi, FR_Sgn, FR_Y_lo
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fsetc.s3 0x7F,0x40
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Return if no danger of over of underflow.
+//
+(p0) fsetc.s2 0x7F,0x42
+ nop.i 999;;
+}
+{ .mfi
+ nop.m 999
+//
+// S0 user supplied status
+// S2 user supplied status + WRE + TD (Overflows)
+// S3 user supplied status + FZ + TD (Underflows)
+//
+(p0) fma.s2 FR_Result_big = FR_Y_hi, FR_Sgn, FR_Y_lo
+ nop.i 999 ;;
+}
+//
+// S0 user supplied status
+// S2 user supplied status + WRE + TD (Overflows)
+// S3 user supplied status + FZ + TD (Underflows)
+//
+//
+// If (Safe) is true, then
+// Compute result using user supplied status field.
+// No overflow or underflow here, but perhaps inexact.
+// Return
+// Else
+// Determine if overflow or underflow was raised.
+// Fetch +/- overflow threshold for IEEE single, double,
+// double extended
+//
+{ .mfi
+(p0) setf.exp FR_Big = GR_T1_ptr
+(p0) fsetc.s2 0x7F,0x40
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p11, p0 = FR_Result_small, 0x00F
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fmerge.ns FR_NBig = FR_Big, FR_Big
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+// Create largest double exponent + 1.
+// Create smallest double exponent - 1.
+// Identify denormals
+//
+(p0) fcmp.ge.unc.s1 p8, p0 = FR_Result_big , FR_Big
+ nop.i 999 ;;
+}
+{ .mii
+ nop.m 999
+ nop.i 999 ;;
+//
+// fcmp: resultS2 <= - overflow threshold
+// fclass: resultS3 is denorm/unorm/0
+//
+(p8) mov GR_Parameter_TAG = 18 ;;
+}
+{ .mfb
+ nop.m 999
+//
+// fcmp: resultS2 >= + overflow threshold
+//
+(p0) fcmp.le.unc.s1 p9, p0 = FR_Result_big, FR_NBig
+(p8) br.cond.spnt __libm_error_region ;;
+}
+{ .mii
+ nop.m 999
+ nop.i 999 ;;
+(p9) mov GR_Parameter_TAG = 18
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+(p9) br.cond.spnt __libm_error_region ;;
+}
+//
+// Report that pow overflowed - either +Inf, or -Inf
+//
+{ .mmb
+(p11) mov GR_Parameter_TAG = 19
+ nop.m 999
+(p11) br.cond.spnt __libm_error_region ;;
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+//
+// Report that pow underflowed
+//
+(p0) br.cond.sptk L(POWL_64_RETURN) ;;
+}
+
+
+L(POWL_64_SQUARE):
+// Here if x not zero and y=2.
+// Must call __libm_error_support for overflow or underflow
+//
+// S0 user supplied status
+// S2 user supplied status + WRE + TD (Overflows)
+// S3 user supplied status + FZ + TD (Underflows)
+//
+{ .mfi
+ nop.m 999
+(p0) fma.s0 FR_Result = FR_Input_X, FR_Input_X, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fsetc.s3 0x7F,0x01
+ nop.i 999
+}
+{ .mlx
+ nop.m 999
+(p0) movl GR_T1_ptr = 0x00000000013FFF ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fma.s3 FR_Result_small = FR_Input_X, FR_Input_X, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fsetc.s3 0x7F,0x40
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Return if no danger of over of underflow.
+//
+(p0) fsetc.s2 0x7F,0x42
+ nop.i 999;;
+}
+{ .mfi
+ nop.m 999
+(p0) fma.s2 FR_Result_big = FR_Input_X, FR_Input_X, f0
+ nop.i 999 ;;
+}
+//
+// S0 user supplied status
+// S2 user supplied status + WRE + TD (Overflows)
+// S3 user supplied status + FZ + TD (Underflows)
+//
+//
+// If (Safe) is true, then
+// Compute result using user supplied status field.
+// No overflow or underflow here, but perhaps inexact.
+// Return
+// Else
+// Determine if overflow or underflow was raised.
+// Fetch +/- overflow threshold for IEEE single, double,
+// double extended
+//
+{ .mfi
+(p0) setf.exp FR_Big = GR_T1_ptr
+(p0) fsetc.s2 0x7F,0x40
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p11, p0 = FR_Result_small, 0x00F
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fmerge.ns FR_NBig = FR_Big, FR_Big
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+// Create largest double exponent + 1.
+// Create smallest double exponent - 1.
+// Identify denormals
+//
+(p0) fcmp.ge.unc.s1 p8, p0 = FR_Result_big , FR_Big
+ nop.i 999 ;;
+}
+{ .mii
+ nop.m 999
+ nop.i 999 ;;
+//
+// fcmp: resultS2 <= - overflow threshold
+// fclass: resultS3 is denorm/unorm/0
+//
+(p8) mov GR_Parameter_TAG = 18 ;;
+}
+{ .mfb
+ nop.m 999
+//
+// fcmp: resultS2 >= + overflow threshold
+//
+(p0) fcmp.le.unc.s1 p9, p0 = FR_Result_big, FR_NBig
+(p8) br.cond.spnt __libm_error_region ;;
+}
+{ .mii
+ nop.m 999
+ nop.i 999 ;;
+(p9) mov GR_Parameter_TAG = 18
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+(p9) br.cond.spnt __libm_error_region ;;
+}
+//
+// Report that pow overflowed - either +Inf, or -Inf
+//
+{ .mmb
+(p11) mov GR_Parameter_TAG = 19
+ nop.m 999
+(p11) br.cond.spnt __libm_error_region ;;
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+//
+// Report that pow underflowed
+//
+(p0) br.cond.sptk L(POWL_64_RETURN) ;;
+}
+
+
+
+
+L(POWL_64_SPECIAL):
+{ .mfi
+ nop.m 999
+(p0) fcmp.eq.s1 p15, p0 = FR_Input_X, f1 // Is x=+1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p14, p0 = FR_Input_Y, 0x023
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p15) fcmp.eq.unc.s0 p6,p0 = FR_Input_Y, f0 // If x=1, flag invalid if y=SNaN
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p15) fmpy.s0 FR_Result = f1,f1 // If x=1, result=1
+(p15) br.cond.spnt L(POWL_64_RETURN) ;; // Exit if x=1
+}
+
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p13, p0 = FR_Input_X, 0x023
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p8, p0 = FR_Input_X, 0x143
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p9, p0 = FR_Input_Y, 0x143
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p10, p0 = FR_Input_X, 0x083
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p11, p0 = FR_Input_Y, 0x083
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p6, p0 = FR_Input_Y, 0x007
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fcmp.eq.unc.s1 p7, p0 = FR_Input_Y, f1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// set p13 if x +/- Inf
+// set p14 if y +/- Inf
+// set p8 if x Natval or +/-SNaN
+// set p9 if y Natval or +/-SNaN
+// set p10 if x QNaN
+// set p11 if y QNaNs
+// set p6 if y is +/-0
+// set p7 if y is 1
+//
+(p8) fmpy.s0 FR_Result = FR_Input_Y, FR_Input_X
+(p6) cmp.ne p8,p0 = r0,r0 ;; // Don't exit if x=snan, y=0 ==> result=+1
+}
+{ .mfb
+ nop.m 999
+(p9) fmpy.s0 FR_Result = FR_Input_Y, FR_Input_X
+(p8) br.cond.spnt L(POWL_64_RETURN) ;;
+}
+{ .mfb
+ nop.m 999
+(p10) fmpy.s0 FR_Result = FR_Input_X, f0
+(p9) br.cond.spnt L(POWL_64_RETURN) ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Produce result for SNaN and NatVals and return
+//
+(p6) fclass.m.unc p15, p0 = FR_Input_X,0x007
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+// If Y +/- 0, set p15 if x +/- 0
+//
+(p6) fclass.m.unc p8, p0 = FR_Input_X,0x0C3
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p6) fcmp.eq.s0 p9,p0 = FR_Input_X, f0 // If y=0, flag if x denormal
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p6) fadd.s0 FR_Result = f1, f0
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Set p8 if y = +/-0 and X is a QNaN/SNaN
+// If y = +/-0, let result = 1.0
+//
+(p7) fmpy.s0 FR_Result = FR_Input_X,f1
+//
+// If y == 1, result = x * 1
+//
+(p15) mov GR_Parameter_TAG = 20
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+(p15) br.cond.spnt __libm_error_region ;;
+}
+{ .mib
+ nop.m 999
+//
+// If x and y are both zero, result = 1.0 and call error
+// support.
+//
+(p8) mov GR_Parameter_TAG = 23
+(p8) br.cond.spnt __libm_error_region ;;
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+//
+// If y = +/-0 and x is a QNaN, result = 1.0 and call error
+// support.
+//
+(p6) br.cond.spnt L(POWL_64_RETURN) ;;
+}
+
+// If x=0, y=-inf, go to the X_IS_ZERO path
+{ .mfb
+ nop.m 999
+(p14) fcmp.eq.unc.s1 p0,p14 = FR_Input_X,f0
+(p7) br.cond.spnt L(POWL_64_RETURN) ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Produce all results for x**0 and x**1
+// Let all the result x ** 0 == 1 and return
+// Let all x ** 1 == x and return
+//
+(p10) fmpy.s0 FR_Result = FR_Input_Y,FR_Input_X
+ nop.i 999 ;;
+}
+{ .mfb
+ nop.m 999
+(p11) fmpy.s0 FR_Result = FR_Input_Y,FR_Input_X
+(p10) br.cond.spnt L(POWL_64_RETURN) ;;
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+(p11) br.cond.spnt L(POWL_64_RETURN) ;;
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+//
+// Return result for x or y QNaN input with QNaN result
+//
+(p14) br.cond.spnt L(POWL_64_Y_IS_INF) ;;
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+(p13) br.cond.spnt L(POWL_64_X_IS_INF) ;;
+}
+L(POWL_64_X_IS_ZERO):
+{ .mmb
+(p0) getf.sig GR_signif_y = FR_Input_Y
+(p0) getf.exp GR_BIASed_exp_y = FR_Input_Y
+ nop.b 999 ;;
+}
+{ .mlx
+ nop.m 999
+(p0) movl GR_Mask = 0x1FFFF
+}
+{ .mlx
+ nop.m 999
+(p0) movl GR_y_sign = 0x20000 ;;
+}
+//
+// Get BIASed exp and significand of y
+//
+{ .mfi
+(p0) and GR_exp_y = GR_Mask,GR_BIASed_exp_y
+ nop.f 999
+(p0) and GR_y_sign = GR_y_sign,GR_BIASed_exp_y
+}
+{ .mlx
+ nop.m 999
+(p0) movl GR_BIAS = 0xFFFF ;;
+}
+{ .mfi
+(p0) cmp.lt.unc p9, p8 = GR_exp_y,GR_BIAS
+ nop.f 999
+//
+// Maybe y is < 1 already, so
+// can never be an integer.
+// Remove sign bit from exponent.
+//
+(p0) sub GR_exp_y = GR_exp_y,GR_BIAS ;;
+}
+{ .mii
+ nop.m 999
+ nop.i 999 ;;
+//
+// Remove exponent BIAS
+//
+(p8) shl GR_exp_y= GR_signif_y,GR_exp_y ;;
+}
+{ .mfi
+(p9) or GR_exp_y= 0xF,GR_signif_y
+ nop.f 999
+ nop.i 999 ;;
+}
+{ .mii
+ nop.m 999
+//
+// Shift significand of y looking for nonzero bits
+// For y > 1, shift signif_y exp_y bits to the left
+// For y < 1, turn on 4 low order bits of significand of y
+// so that the fraction will always be non-zero
+//
+(p0) shl GR_signif_y= GR_exp_y,1 ;;
+(p0) extr.u GR_low_order_bit = GR_exp_y,63,1
+}
+//
+// Integer part of y shifted off.
+// Get y's low even or odd bit - y might not be an int.
+//
+{ .mii
+(p0) cmp.eq.unc p13,p0 = GR_signif_y, r0
+(p0) cmp.eq.unc p8,p9 = GR_y_sign, r0 ;;
+//
+// Is y an int?
+// Is y positive
+//
+(p13) cmp.ne.unc p13,p0 = GR_low_order_bit, r0 ;;
+}
+//
+// Is y and int and odd?
+//
+{ .mfb
+(p13) cmp.eq.unc p13,p14 = GR_y_sign, r0
+(p8) fcmp.eq.s0 p12,p0 = FR_Input_Y, f0 // If x=0 and y>0 flag if y denormal
+ nop.b 999 ;;
+}
+{ .mfb
+ nop.m 999
+//
+// Is y and int and odd and positive?
+//
+(p13) mov FR_Result = FR_Input_X
+(p13) br.cond.sptk L(POWL_64_RETURN) ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Return +/-0 when x=+/-0 and y is and odd pos. int
+//
+(p14) frcpa.s0 FR_Result, p10 = f1, FR_Input_X
+(p14) mov GR_Parameter_TAG = 21
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+(p14) br.cond.spnt __libm_error_region ;;
+}
+
+{ .mfb
+ nop.m 999
+//
+// Return +/-0 when x=+/-Inf and y is and odd neg int
+// and raise dz exception
+//
+(p8) mov FR_Result = f0
+(p8) br.cond.sptk L(POWL_64_RETURN) ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Return +0 when x=+/-0 and y > 0 and not odd.
+//
+(p9) frcpa.s0 FR_Result, p10 = f1,f0
+(p9) mov GR_Parameter_TAG = 21
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+(p9) br.cond.sptk __libm_error_region ;;
+}
+L(POWL_64_X_IS_INF):
+{ .mfi
+(p0) getf.exp GR_exp_y = FR_Input_Y
+(p0) fclass.m.unc p13, p0 = FR_Input_X,0x022
+(p0) mov GR_Mask = 0x1FFFF ;;
+}
+
+{ .mfi
+(p0) getf.sig GR_signif_y = FR_Input_Y
+(p0) fcmp.eq.s0 p9,p0 = FR_Input_Y, f0 // Flag if y denormal
+ nop.i 999 ;;
+}
+
+//
+// Get exp and significand of y
+// Create exponent mask and sign mask
+//
+{ .mlx
+(p0) and GR_low_order_bit = GR_Mask,GR_exp_y
+(p0) movl GR_BIAS = 0xFFFF
+}
+{ .mmi
+ nop.m 999 ;;
+//
+// Remove sign bit from exponent.
+//
+(p0) cmp.lt.unc p9, p8 = GR_low_order_bit,GR_BIAS
+//
+// Maybe y is < 1 already, so
+// isn't an int.
+//
+(p0) sub GR_low_order_bit = GR_low_order_bit,GR_BIAS
+}
+{ .mlx
+ nop.m 999
+(p0) movl GR_sign_mask = 0x20000 ;;
+}
+{ .mfi
+(p0) and GR_sign_mask = GR_sign_mask,GR_exp_y
+//
+// Return +Inf when x=+/-0 and y < 0 and not odd and raise
+// divide-by-zero exception.
+//
+(p0) fclass.m.unc p11, p0 = FR_Input_X,0x021
+ nop.i 999 ;;
+}
+{ .mmi
+ nop.m 999 ;;
+//
+// Is shift off integer part of y.
+// Get y's even or odd bit - y might not be an int.
+//
+(p11) cmp.eq.unc p11,p12 = GR_sign_mask, r0
+//
+// Remove exponent BIAS
+//
+(p8) shl GR_exp_y = GR_signif_y,GR_low_order_bit ;;
+}
+{ .mfi
+(p9) or GR_exp_y = 0xF,GR_signif_y
+//
+// Is y positive or negative when x is +Inf?
+// Is y and int when x = -Inf
+//
+(p11) mov FR_Result = FR_Input_X
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p12) mov FR_Result = f0
+ nop.i 999 ;;
+}
+{ .mii
+ nop.m 999
+//
+// Shift signficand looking for nonzero bits
+// For y non-ints, upset the significand.
+//
+(p0) shl GR_signif_y = GR_exp_y,1 ;;
+(p13) cmp.eq.unc p13,p0 = GR_signif_y, r0
+}
+{ .mii
+ nop.m 999
+(p0) extr.u GR_low_order_bit = GR_exp_y,63,1 ;;
+(p13) cmp.ne.unc p13,p0 = GR_low_order_bit, r0
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+(p11) br.cond.sptk L(POWL_64_RETURN) ;;
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+(p12) br.cond.sptk L(POWL_64_RETURN) ;;
+}
+//
+// Return Inf for y > 0
+// Return +0 for y < 0
+// Is y even or odd?
+//
+{ .mii
+(p13) cmp.eq.unc p13,p10 = GR_sign_mask, r0
+(p0) cmp.eq.unc p8,p9 = GR_sign_mask, r0 ;;
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+// For x = -inf, y is and int, positive
+// and odd
+// Is y positive in general?
+//
+(p13) mov FR_Result = FR_Input_X
+ nop.i 999 ;;
+}
+{ .mfb
+ nop.m 999
+(p10) fmerge.ns FR_Result = f0, f0
+(p13) br.cond.sptk L(POWL_64_RETURN) ;;
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+(p10) br.cond.sptk L(POWL_64_RETURN) ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Return -Inf for x = -inf and y > 0 and odd int.
+// Return -0 for x = -inf and y < 0 and odd int.
+//
+(p8) fmerge.ns FR_Result = FR_Input_X, FR_Input_X
+ nop.i 999 ;;
+}
+{ .mfb
+ nop.m 999
+(p9) mov FR_Result = f0
+(p8) br.cond.sptk L(POWL_64_RETURN) ;;
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+(p9) br.cond.sptk L(POWL_64_RETURN) ;;
+}
+L(POWL_64_Y_IS_INF):
+{ .mfi
+ nop.m 999
+//
+// Return Inf for x = -inf and y > 0 not an odd int.
+// Return +0 for x = -inf and y < 0 and not an odd int.
+//
+(p0) fclass.m.unc p8, p0 = FR_Input_Y, 0x021
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p9, p0 = FR_Input_Y, 0x022
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fabs FR_X = FR_Input_X
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fcmp.eq.s0 p10,p0 = FR_Input_X, f0 // flag if x denormal
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Find y = +/- Inf
+// Compute |x|
+//
+(p8) fcmp.lt.unc.s1 p6, p0 = FR_X, f1
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p8) fcmp.gt.unc.s1 p7, p0 = FR_X, f1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p9) fcmp.lt.unc.s1 p12, p0 = FR_X, f1
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p9) fcmp.gt.unc.s1 p13, p0 = FR_X, f1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// For y = +Inf and |x| < 1 returns 0
+// For y = +Inf and |x| > 1 returns Inf
+// For y = -Inf and |x| < 1 returns Inf
+// For y = -Inf and |x| > 1 returns 0
+//
+(p6) mov FR_Result = f0
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p7) mov FR_Result = FR_Input_Y
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p12) fmpy.s0 FR_Result = FR_Input_Y, FR_Input_Y
+ nop.i 999 ;;
+}
+{ .mfb
+ nop.m 999
+(p13) mov FR_Result = f0
+//
+// Produce x ** +/- Inf results
+//
+(p6) br.cond.spnt L(POWL_64_RETURN) ;;
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+(p7) br.cond.spnt L(POWL_64_RETURN) ;;
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+(p12) br.cond.spnt L(POWL_64_RETURN) ;;
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+(p13) br.cond.spnt L(POWL_64_RETURN) ;;
+}
+{ .mfb
+ nop.m 999
+//
+// +/-1 ** +/-Inf, result is +1
+//
+(p0) fmpy.s0 FR_Result = f1,f1
+(p0) br.cond.sptk L(POWL_64_RETURN) ;;
+}
+L(POWL_64_UNSUPPORT):
+{ .mfb
+ nop.m 999
+//
+// Return NaN and raise invalid
+//
+(p0) fmpy.s0 FR_Result = FR_Input_X,f0
+//
+// Raise exceptions for specific
+// values - pseudo NaN and
+// infinities.
+//
+(p0) br.cond.sptk L(POWL_64_RETURN) ;;
+}
+L(POWL_64_XNEG):
+{ .mfi
+ nop.m 999
+(p0) frcpa.s0 FR_Result, p8 = f0, f0
+//
+// Raise invalid for x < 0 and
+// y not an integer and
+//
+(p0) mov GR_Parameter_TAG = 22
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+(p0) br.cond.sptk __libm_error_region ;;
+}
+L(POWL_64_SQRT):
+{ .mfi
+ nop.m 999
+(p0) frsqrta.s0 FR_Result,p10 = FR_Input_X
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p10) fma.s1 f62=FR_Half,FR_Input_X,f0
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Step (2)
+// h = 1/2 * a in f9
+//
+(p10) fma.s1 f63=FR_Result,FR_Result,f0
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Step (3)
+// t1 = y0 * y0 in f10
+//
+(p10) fnma.s1 f32=f63,f62,f11
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Step (4)
+// t2 = 1/2 - t1 * h in f10
+//
+(p10) fma.s1 f33=f32,FR_Result,FR_Result
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Step (5)
+// y1 = y0 + t2 * y0 in f13
+//
+(p10) fma.s1 f34=f33,f62,f0
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Step (6)
+// t3 = y1 * h in f10
+//
+(p10) fnma.s1 f35=f34,f33,f11
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Step (7)
+// t4 = 1/2 - t3 * y1 in f10
+//
+(p10) fma.s1 f63=f35,f33,f33
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Step (8)
+// y2 = y1 + t4 * y1 in f13
+//
+(p10) fma.s1 f32=FR_Input_X,f63,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+// Step (9)
+// S = a * y2 in f10
+//
+(p10) fma.s1 FR_Result=f63,f62,f0
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Step (10)
+// t5 = y2 * h in f9
+//
+(p10) fma.s1 f33=f11,f63,f0
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Step (11)
+// H = 1/2 * y2 in f11
+//
+(p10) fnma.s1 f34=f32,f32,f8
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+// Step (12)
+// d = a - S * S in f12
+//
+(p10) fnma.s1 f35=FR_Result,f63,f11
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Step (13)
+// t6 = 1/2 - t5 * y2 in f7
+//
+(p10) fma.s1 f62=f33,f34,f32
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+// Step (14)
+// S1 = S + d * H in f13
+//
+(p10) fma.s1 f63=f33,f35,f33
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Step (15)
+// H1 = H + t6 * h in f7
+//
+(p10) fnma.s1 f32=f62,f62,FR_Input_X
+ nop.i 999 ;;
+}
+{ .mfb
+ nop.m 999
+//
+// Step (16)
+// d1 = a - S1 * S1
+//
+(p10) fma.s0 FR_Result=f32,f63,f62
+//
+// Step (17)
+// R = S1 + d1 * H1
+//
+(p10) br.cond.sptk L(POWL_64_RETURN) ;;
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+//
+// Do the Newton-Raphson iteration from the EAS.
+//
+(p0) br.cond.sptk L(POWL_64_RETURN) ;;
+}
+//
+// Take care of the degenerate cases.
+//
+
+L(POWL_64_RETURN):
+{ .mfb
+ nop.m 999
+(p0) mov FR_Output = FR_Result
+(p0) br.ret.sptk b0 ;;
+}
+.endp powl
+ASM_SIZE_DIRECTIVE(powl)
+
+.proc __libm_error_region
+__libm_error_region:
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfe [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mib
+ stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
+}
+{ .mib
+ stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+{ .mmi
+ ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_rem_pio2.c b/sysdeps/ia64/fpu/e_rem_pio2.c
new file mode 100644
index 0000000..41254ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_rem_pio2.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/e_rem_pio2f.c b/sysdeps/ia64/fpu/e_rem_pio2f.c
new file mode 100644
index 0000000..41254ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_rem_pio2f.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/e_remainder.S b/sysdeps/ia64/fpu/e_remainder.S
new file mode 100644
index 0000000..c8aca17
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_remainder.S
@@ -0,0 +1,592 @@
+ .file "remainder.asm"
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska, Bob Norin,
+// Shane Story, and Ping Tak Peter Tang of the Computational Software Lab,
+// Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//====================================================================
+// 2/02/00 Initial version
+// 3/02/00 New Algorithm
+// 4/04/00 Unwind support added
+// 7/21/00 Fixed quotient=2^{24*m+23}*1.q1...q23 1 bug
+// 8/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+//11/29/00 Set FR_Y to f9
+//
+// API
+//====================================================================
+// double remainder(double,double);
+//
+// Overview of operation
+//====================================================================
+// remainder(a,b)=a-i*b,
+// where i is an integer such that, if b!=0 and a is finite,
+// |a/b-i|<=1/2. If |a/b-i|=1/2, i is even.
+//
+// Algorithm
+//====================================================================
+// a). eliminate special cases
+// b). if |a/b|<0.25 (first quotient estimate), return a
+// c). use single precision divide algorithm to get quotient q
+// rounded to 24 bits of precision
+// d). calculate partial remainders (using both q and q-ulp);
+// select one and RZ(a/b) based on the sign of |a|-|b|*q
+// e). if the exponent difference (exponent(a)-exponent(b))
+// is less than 24 (quotient estimate<2^{24}-2), use RZ(a/b)
+// and sticky bits to round to integer; exit loop and
+// calculate final remainder
+// f). if exponent(a)-exponent(b)>=24, select new value of a as
+// the partial remainder calculated using RZ(a/b);
+// repeat from c).
+//
+// Special cases
+//====================================================================
+// a=+/- Inf, or b=+/-0: return NaN, call libm_error_support
+// a=NaN or b=NaN: return NaN
+
+#include "libm_support.h"
+
+// Registers used
+//====================================================================
+// Predicate registers: p6-p14
+// General registers: r2,r3,r28,r29,r32 (ar.pfs), r33-r39
+// Floating point registers: f6-f15,f32
+
+ .section .text
+
+GR_SAVE_B0 = r33
+GR_SAVE_PFS = r34
+GR_SAVE_GP = r35
+GR_SAVE_SP = r36
+
+GR_Parameter_X = r37
+GR_Parameter_Y = r38
+GR_Parameter_RESULT = r39
+GR_Parameter_TAG = r40
+
+FR_X = f10
+FR_Y = f9
+FR_RESULT = f8
+
+
+
+ .proc remainder#
+ .align 32
+ .global remainder#
+ .align 32
+
+remainder:
+#ifdef _LIBC
+.global __remainder
+.type __remainder,@function
+__remainder:
+#endif
+// inputs in f8, f9
+// result in f8
+
+{ .mfi
+ alloc r32=ar.pfs,1,4,4,0
+ // f13=|a|
+ fmerge.s f13=f0,f8
+ nop.i 0
+}
+ {.mfi
+ nop.m 0
+ // f14=|b|
+ fmerge.s f14=f0,f9
+ nop.i 0;;
+}
+ {.mlx
+ mov r28=0x2ffdd
+ // r2=2^{23}
+ movl r3=0x4b000000;;
+}
+
+// Y +-NAN, +-inf, +-0? p11
+{ .mfi
+ setf.exp f32=r28
+(p0) fclass.m.unc p11,p0 = f9, 0xe7
+ nop.i 999
+}
+// qnan snan inf norm unorm 0 -+
+// 1 1 1 0 0 0 11
+// e 3
+// X +-NAN, +-inf, ? p9
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p9,p0 = f8, 0xe3
+ nop.i 999;;
+}
+
+{.mfi
+ nop.m 0
+ mov f12=f0
+ nop.i 0
+}
+{ .mfi
+ // set p7=1
+ cmp.eq.unc p7,p0=r0,r0
+ // Step (1)
+ // y0 = 1 / b in f10
+ frcpa.s1 f10,p6=f13,f14
+ nop.i 0;;
+}
+
+{.bbb
+ (p9) br.cond.spnt L(FREM_X_NAN_INF)
+ (p11) br.cond.spnt L(FREM_Y_NAN_INF_ZERO)
+ nop.b 0
+} {.mfi
+ nop.m 0
+ // set D flag if a (f8) is denormal
+ fnma.s0 f6=f8,f1,f8
+ nop.i 0;;
+}
+
+
+L(remloop24):
+ { .mfi
+ nop.m 0
+ // Step (2)
+ // q0 = a * y0 in f12
+ (p6) fma.s1 f12=f13,f10,f0
+ nop.i 0
+} { .mfi
+ nop.m 0
+ // Step (3)
+ // e0 = 1 - b * y0 in f7
+ (p6) fnma.s1 f7=f14,f10,f1
+ nop.i 0;;
+} {.mlx
+ nop.m 0
+ // r2=1.25*2^{-24}
+ movl r2=0x33a00000;;
+}
+
+{.mfi
+ nop.m 0
+ // q1=q0*(1+e0)
+ fma.s1 f15=f12,f7,f12
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ // Step (4)
+ // e1 = e0 * e0 + E in f7
+ (p6) fma.s1 f7=f7,f7,f32
+ nop.i 0;;
+}
+ {.mii
+ (p7) getf.exp r29=f12
+ (p7) mov r28=0xfffd
+ nop.i 0;;
+}
+ { .mfi
+ // f12=2^{23}
+ setf.s f12=r3
+ // Step (5)
+ // q2 = q1 + e1 * q1 in f11
+ (p6) fma.s.s1 f11=f7,f15,f15
+ nop.i 0
+} { .mfi
+ nop.m 0
+ // Step (6)
+ // q2 = q1 + e1 * q1 in f6
+ (p6) fma.s1 f6=f7,f15,f15
+ nop.i 0;;
+}
+
+ {.mmi
+ // f15=1.25*2^{-24}
+ setf.s f15=r2
+ // q<1/4 ? (i.e. expon< -2)
+ (p7) cmp.gt p7,p0=r28,r29
+ nop.i 0;;
+}
+
+{.mfb
+ // r29= -32+bias
+ mov r29=0xffdf
+ // if |a/b|<1/4, set D flag before returning
+ (p7) fma.d.s0 f9=f9,f0,f8
+ nop.b 0;;
+}
+ {.mfb
+ nop.m 0
+ // can be combined with bundle above if sign of 0 or
+ // FTZ enabled are not important
+ (p7) fmerge.s f8=f8,f9
+ // return if |a|<4*|b| (estimated quotient < 1/4)
+ (p7) br.ret.spnt b0;;
+}
+ {.mfi
+ // f7=2^{-32}
+ setf.exp f7=r29
+ // set f8 to current a value | sign
+ fmerge.s f8=f8,f13
+ nop.i 0;;
+}
+
+
+ {.mfi
+ getf.exp r28=f6
+ // last step ? (q<2^{23})
+ fcmp.lt.unc.s1 p0,p12=f6,f12
+ nop.i 0;;
+}
+ {.mfi
+ nop.m 0
+ // r=a-b*q
+ fnma.s1 f6=f14,f11,f13
+ nop.i 0
+} {.mfi
+ // r2=23+bias
+ mov r2=0xffff+23
+ // q'=q-q*(1.25*2^{-24}) (q'=q-ulp)
+ fnma.s.s1 f15=f11,f15,f11
+ nop.i 0;;
+}
+ {.mmi
+ nop.m 0
+ cmp.eq p11,p14=r2,r28
+ nop.i 0;;
+}
+
+.pred.rel "mutex",p11,p14
+ {.mfi
+ nop.m 0
+ // if exp_q=2^23, then r=a-b*2^{23}
+ (p11) fnma.s1 f13=f12,f14,f13
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // r2=a-b*q'
+ (p14) fnma.s1 f13=f14,f15,f13
+ nop.i 0;;
+}
+ {.mfi
+ nop.m 0
+ // r>0 iff q=RZ(a/b) and inexact
+ fcmp.gt.unc.s1 p8,p0=f6,f0
+ nop.i 0
+} {.mfi
+ nop.m 0
+ // r<0 iff q'=RZ(a/b) and inexact
+ (p14) fcmp.lt.unc.s1 p9,p10=f6,f0
+ nop.i 0;;
+}
+
+.pred.rel "mutex",p8,p9
+ {.mfi
+ nop.m 0
+ // (p8) Q=q+(last iteration ? sticky bits:0)
+ // i.e. Q=q+q*x (x=2^{-32} or 0)
+ (p8) fma.s1 f11=f11,f7,f11
+ nop.i 0
+} {.mfi
+ nop.m 0
+ // (p9) Q=q'+(last iteration ? sticky bits:0)
+ // i.e. Q=q'+q'*x (x=2^{-32} or 0)
+ (p9) fma.s1 f11=f15,f7,f15
+ nop.i 0;;
+}
+
+ {.mfb
+ nop.m 0
+ // (p9) set r=r2 (new a, if not last iteration)
+ // (p10) new a =r
+ (p10) mov f13=f6
+ (p12) br.cond.sptk L(remloop24);;
+}
+
+// last iteration
+ {.mfi
+ nop.m 0
+ // set f9=|b|*sgn(a)
+ fmerge.s f9=f8,f9
+ nop.i 0
+}
+ {.mfi
+ nop.m 0
+ // round to integer
+ fcvt.fx.s1 f11=f11
+ nop.i 0;;
+}
+ {.mfi
+ nop.m 0
+ // save sign of a
+ fmerge.s f7=f8,f8
+ nop.i 0
+} {.mfi
+ nop.m 0
+ // normalize
+ fcvt.xf f11=f11
+ nop.i 0;;
+}
+ {.mfi
+ nop.m 0
+ // This can be removed if sign of 0 is not important
+ // get remainder using sf1
+ fnma.d.s1 f12=f9,f11,f8
+ nop.i 0
+}
+ {.mfi
+ nop.m 0
+ // get remainder
+ fnma.d.s0 f8=f9,f11,f8
+ nop.i 0;;
+}
+ {.mfi
+ nop.m 0
+ // f12=0?
+ // This can be removed if sign of 0 is not important
+ fcmp.eq.unc.s1 p8,p0=f12,f0
+ nop.i 0;;
+}
+ {.mfb
+ nop.m 0
+ // if f8=0, set sign correctly
+ // This can be removed if sign of 0 is not important
+ (p8) fmerge.s f8=f7,f8
+ // return
+ br.ret.sptk b0;;
+}
+
+
+L(FREM_X_NAN_INF):
+
+// Y zero ?
+{.mfi
+ nop.m 0
+ fma.s1 f10=f9,f1,f0
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ fcmp.eq.unc.s1 p11,p0=f10,f0
+ nop.i 0;;
+}
+{.mib
+ nop.m 0
+ nop.i 0
+ // if Y zero
+ (p11) br.cond.spnt L(FREM_Y_ZERO);;
+}
+
+// X infinity? Return QNAN indefinite
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p8,p0 = f8, 0x23
+ nop.i 999
+}
+// X infinity? Return QNAN indefinite
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p11,p0 = f8, 0x23
+ nop.i 999;;
+}
+// Y NaN ?
+{.mfi
+ nop.m 999
+(p8) fclass.m.unc p0,p8=f9,0xc3
+ nop.i 0;;
+}
+{.mfi
+ nop.m 999
+ // also set Denormal flag if necessary
+(p8) fma.s0 f9=f9,f1,f0
+ nop.i 0
+}
+{ .mfi
+ nop.m 999
+(p8) frcpa.s0 f8,p7 = f8,f8
+ nop.i 999 ;;
+}
+
+{.mfi
+ nop.m 999
+(p11) mov f10=f8
+ nop.i 0
+}
+{ .mfi
+ nop.m 999
+(p8) fma.d f8=f8,f1,f0
+ nop.i 0 ;;
+}
+
+{ .mfb
+ nop.m 999
+ frcpa.s0 f8,p7=f8,f9
+ (p11) br.cond.spnt L(EXP_ERROR_RETURN);;
+}
+{ .mib
+ nop.m 0
+ nop.i 0
+ br.ret.spnt b0 ;;
+}
+
+
+L(FREM_Y_NAN_INF_ZERO):
+
+// Y INF
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p7,p0 = f9, 0x23
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p7) fma.d f8=f8,f1,f0
+(p7) br.ret.spnt b0 ;;
+}
+
+// Y NAN?
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p9,p0 = f9, 0xc3
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p9) fma.d f8=f9,f1,f0
+(p9) br.ret.spnt b0 ;;
+}
+
+L(FREM_Y_ZERO):
+// Y zero? Must be zero at this point
+// because it is the only choice left.
+// Return QNAN indefinite
+
+// X NAN?
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p9,p10 = f8, 0xc3
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p10) fclass.nm p9,p10 = f8, 0xff
+ nop.i 999 ;;
+}
+
+{.mfi
+ nop.m 999
+ (p9) frcpa f11,p7=f8,f0
+ nop.i 0;;
+}
+
+{ .mfi
+ nop.m 999
+(p10) frcpa f11,p7 = f0,f0
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fmerge.s f10 = f8, f8
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.d f8=f11,f1,f0
+ nop.i 999
+}
+
+
+L(EXP_ERROR_RETURN):
+
+{ .mib
+(p0) mov GR_Parameter_TAG = 124
+ nop.i 999
+(p0) br.sptk __libm_error_region;;
+}
+
+.endp remainder
+ASM_SIZE_DIRECTIVE(remainder)
+#ifdef _LIBC
+ASM_SIZE_DIRECTIVE(__remainder)
+#endif
+
+
+
+.proc __libm_error_region
+__libm_error_region:
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfd [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mib
+ stfd [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
+}
+{ .mib
+ stfd [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+{ .mmi
+ ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_remainderf.S b/sysdeps/ia64/fpu/e_remainderf.S
new file mode 100644
index 0000000..e3b8b8a
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_remainderf.S
@@ -0,0 +1,611 @@
+ .file "remainderf.asm"
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
+// Bob Norin, Shane Story, and Ping Tak Peter Tang of the Computational
+// Software Lab,
+// Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//====================================================================
+// 2/02/00 Initial version
+// 3/02/00 New algorithm
+// 4/04/00 Unwind support added
+// 7/21/00 Fixed quotient=2^{24*m+23} bug
+// 8/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+//11/29/00 Set FR_Y to f9
+//
+// API
+//====================================================================
+// float remainderf(float,float);
+//
+// Overview of operation
+//====================================================================
+// remainder(a,b)=a-i*b,
+// where i is an integer such that, if b!=0 and a is finite,
+// |a/b-i|<=1/2. If |a/b-i|=1/2, i is even.
+//
+// Algorithm
+//====================================================================
+// a). eliminate special cases
+// b). if |a/b|<0.25 (first quotient estimate), return a
+// c). use single precision divide algorithm to get quotient q
+// rounded to 24 bits of precision
+// d). calculate partial remainders (using both q and q-ulp);
+// select one and RZ(a/b) based on the sign of |a|-|b|*q
+// e). if the exponent difference (exponent(a)-exponent(b))
+// is less than 24 (quotient estimate<2^{24}-2), use RZ(a/b)
+// and sticky bits to round to integer; exit loop and
+// calculate final remainder
+// f). if exponent(a)-exponent(b)>=24, select new value of a as
+// the partial remainder calculated using RZ(a/b);
+// repeat from c).
+//
+// Special cases
+//====================================================================
+// a=+/- Inf, or b=+/-0: return NaN, call libm_error_support
+// a=NaN or b=NaN: return NaN
+
+#include "libm_support.h"
+
+//
+// Registers used
+//====================================================================
+// Predicate registers: p6-p12
+// General registers: r2,r3,r28,r29,r32 (ar.pfs), r33-r39
+// Floating point registers: f6-f15
+//
+
+.section .text
+
+GR_SAVE_B0 = r33
+GR_SAVE_PFS = r34
+GR_SAVE_GP = r35
+GR_SAVE_SP = r36
+
+GR_Parameter_X = r37
+GR_Parameter_Y = r38
+GR_Parameter_RESULT = r39
+GR_Parameter_TAG = r40
+
+FR_X = f10
+FR_Y = f9
+FR_RESULT = f8
+
+
+ .proc remainderf#
+ .align 32
+ .global remainderf#
+ .align 32
+
+remainderf:
+#ifdef _LIBC
+.global __remainderf
+.type __remainderf,@function
+__remainderf:
+#endif
+// inputs in f8, f9
+// result in f8
+
+{ .mfi
+ alloc r32=ar.pfs,1,4,4,0
+ // f13=|a|
+ fmerge.s f13=f0,f8
+ nop.i 0
+}
+ {.mfi
+ nop.m 0
+ // f14=|b|
+ fmerge.s f14=f0,f9
+ nop.i 0;;
+}
+ {.mlx
+ nop.m 0
+ // r2=2^{24}-2
+ movl r3=0x4b7ffffe;;
+}
+
+// Y +-NAN, +-inf, +-0? p11
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p11,p0 = f9, 0xe7
+ nop.i 999
+}
+// qnan snan inf norm unorm 0 -+
+// 1 1 1 0 0 0 11
+// e 3
+// X +-NAN, +-inf, ? p9
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p9,p0 = f8, 0xe3
+ nop.i 999;;
+}
+
+{.mfi
+ nop.m 0
+ mov f15=f0
+ nop.i 0
+}
+{ .mfi
+ // set p7=1
+ cmp.eq.unc p7,p0=r0,r0
+ // Step (1)
+ // y0 = 1 / b in f10
+ frcpa.s1 f10,p6=f13,f14
+ nop.i 0;;
+}
+{.bbb
+ (p9) br.cond.spnt L(FREM_X_NAN_INF)
+ (p11) br.cond.spnt L(FREM_Y_NAN_INF_ZERO)
+ nop.b 0
+} {.mfi
+ nop.m 0
+ // set D flag if a (f8) is denormal
+ fnma.s0 f6=f8,f1,f8
+ nop.i 0;;
+}
+
+.align 32
+L(remloop24):
+ { .mfi
+ // f12=2^{24}-2
+ setf.s f12=r3
+ // Step (2)
+ // q0 = a * y0 in f15
+ (p6) fma.s1 f15=f13,f10,f0
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ // Step (3)
+ // e0 = 1 - b * y0 in f7
+ (p6) fnma.s1 f7=f14,f10,f1
+ nop.i 0;;
+}
+{.mlx
+ nop.m 0
+ // r2=1.25*2^{-24}
+ movl r2=0x33a00000;;
+}
+ { .mfi
+ nop.m 0
+ // Step (4)
+ // q1 = q0 + e0 * q0 in f6
+ (p6) fma.s1 f6=f7,f15,f15
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ // Step (5)
+ // e1 = e0 * e0 in f7
+ (p6) fma.s1 f7=f7,f7,f0
+ nop.i 0;;
+}
+ {.mii
+ (p7) getf.exp r29=f15
+ (p7) mov r28=0xfffd
+ nop.i 0;;
+}
+
+ { .mfi
+ // f15=1.25*2^{-24}
+ setf.s f15=r2
+ // Step (6)
+ // q2 = q1 + e1 * q1 in f6
+ (p6) fma.s1 f6=f7,f6,f6
+ nop.i 0
+}
+{ .mfi
+ mov r2=0x3e7
+ // Step (7)
+ // e2 = e1 * e1 in f7
+ (p6) fma.s1 f7=f7,f7,f0
+ nop.i 0;;
+}
+
+ {.mmi
+ // q<1/4 ? (i.e. expon< -2)
+ (p7) cmp.gt.unc p7,p0=r28,r29
+ nop.m 0
+ // r2=0x3e7000000
+ shl r2=r2,24;;
+}
+
+{.mfb
+ // r2=0x3e7000001
+ add r2=1,r2
+ // if |a/b|<1/4, set D flag before returning
+ (p7) fma.s.s0 f9=f9,f0,f8
+ nop.b 0;;
+}
+ {.mfb
+ nop.m 0
+ // can be combined with bundle above if sign of 0 or
+ // FTZ enabled are not important
+ (p7) fmerge.s f8=f8,f9
+ // return if |a|<4*|b| (estimated quotient < 1/4)
+ (p7) br.ret.spnt b0;;
+}
+ {.mfi
+ nop.m 0
+ // set f8 to current a value | sign
+ fmerge.s f8=f8,f13
+ // r2=2^{-24}+2^{-48} (double prec.)
+ shl r2=r2,28;;
+}
+
+
+{ .mfi
+ // r29= -32+bias
+ mov r29=0xffdf
+ // Step (8)
+ // q3 = q2 + e2 * q2 in f6
+ (p6) fma.d.s1 f6=f7,f6,f6
+ nop.i 0;;
+}
+{ .mfi
+ nop.m 0
+ // Step (9)
+ // q = q3 in f11
+ (p6) fma.s.s1 f11=f6,f1,f0
+ nop.i 0;;
+}
+ {.mfi
+ // f7=2^{-24}
+ setf.d f7=r2
+ // last step ? (q3<2^{24}-2 --> q<2^{24})
+ fcmp.lt.unc.s1 p0,p12=f6,f12
+ nop.i 0
+} {.mfi
+ // f12=2^{-32}
+ setf.exp f12=r29
+ nop.f 0
+ nop.i 0;;
+}
+ {.mfi
+ nop.m 0
+ // r=a-b*q
+ fnma.s1 f6=f14,f11,f13
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // q'=q-q*(1.25*2^{-24}) (q'=q-ulp)
+ fnma.s.s1 f15=f11,f15,f11
+ nop.i 0;;
+}
+
+ {.mfi
+ nop.m 0
+ // r2=a-b*q'
+ fnma.s1 f13=f14,f15,f13
+ nop.i 0;;
+}
+ {.mfi
+ nop.m 0
+ // r>0 iff q=RZ(a/b) and inexact
+ fcmp.gt.unc.s1 p8,p0=f6,f0
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // r<0 iff q'=RZ(a/b) and inexact
+ fcmp.lt.unc.s1 p9,p10=f6,f0
+ nop.i 0;;
+}
+.pred.rel "mutex",p8,p9
+ {.mfi
+ nop.m 0
+ // (p8) Q=q+(last iteration ? sticky bits:0)
+ // i.e. Q=q+q*x (x=2^{-32} or 0)
+ (p8) fma.s1 f11=f11,f12,f11
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // (p9) Q=q'+(last iteration ? sticky bits:0)
+ // i.e. Q=q'+q'*x (x=2^{-24} or 0: if expon. difference=23, want to round back to q)
+ (p9) fma.s1 f11=f15,f7,f15
+ nop.i 0;;
+}
+
+ {.mfb
+ nop.m 0
+ // (p9) set r=r2 (new a, if not last iteration)
+ // (p10) new a =r
+ (p10) mov f13=f6
+ (p12) br.cond.sptk L(remloop24);;
+}
+
+// last iteration
+ {.mfi
+ nop.m 0
+ // set f9=|b|*sgn(a)
+ fmerge.s f9=f8,f9
+ nop.i 0
+}
+ {.mfi
+ nop.m 0
+ // round to integer
+ fcvt.fx.s1 f11=f11
+ nop.i 0;;
+}
+ {.mfi
+ nop.m 0
+ // save sign of a
+ fmerge.s f7=f8,f8
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // normalize
+ fcvt.xf f11=f11
+ nop.i 0;;
+}
+ {.mfi
+ nop.m 0
+ // This can be removed if sign of 0 is not important
+ // get remainder using sf1
+ fnma.s.s1 f12=f9,f11,f8
+ nop.i 0
+}
+ {.mfi
+ nop.m 0
+ // get remainder
+ fnma.s.s0 f8=f9,f11,f8
+ nop.i 0;;
+}
+
+
+
+ {.mfi
+ nop.m 0
+ // f12=0?
+ // This can be removed if sign of 0 is not important
+ fcmp.eq.unc.s1 p8,p0=f12,f0
+ nop.i 0;;
+}
+ {.mfb
+ nop.m 0
+ // if f8=0, set sign correctly
+ // This can be removed if sign of 0 is not important
+ (p8) fmerge.s f8=f7,f8
+ // return
+ br.ret.sptk b0;;
+}
+
+
+L(FREM_X_NAN_INF):
+
+// Y zero ?
+{.mfi
+ nop.m 0
+ fma.s1 f10=f9,f1,f0
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ fcmp.eq.unc.s1 p11,p0=f10,f0
+ nop.i 0;;
+}
+{.mib
+ nop.m 0
+ nop.i 0
+ // if Y zero
+ (p11) br.cond.spnt L(FREM_Y_ZERO);;
+}
+
+// X infinity? Return QNAN indefinite
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p8,p0 = f8, 0x23
+ nop.i 999
+}
+// X infinity? Return QNAN indefinite
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p11,p0 = f8, 0x23
+ nop.i 999;;
+}
+// Y NaN ?
+{.mfi
+ nop.m 999
+(p8) fclass.m.unc p0,p8=f9,0xc3
+ nop.i 0;;
+}
+{.mfi
+ nop.m 999
+ // also set Denormal flag if necessary
+(p8) fma.s0 f9=f9,f1,f0
+ nop.i 0
+}
+{ .mfi
+ nop.m 999
+(p8) frcpa.s0 f8,p7 = f8,f8
+ nop.i 999 ;;
+}
+
+{.mfi
+ nop.m 999
+(p11) mov f10=f8
+ nop.i 0
+}
+{ .mfi
+ nop.m 999
+(p8) fma.s f8=f8,f1,f0
+ nop.i 0 ;;
+}
+
+{ .mfb
+ nop.m 999
+ frcpa.s0 f8,p7=f8,f9
+ (p11) br.cond.spnt L(EXP_ERROR_RETURN);;
+}
+{ .mib
+ nop.m 0
+ nop.i 0
+ br.ret.spnt b0 ;;
+}
+
+
+L(FREM_Y_NAN_INF_ZERO):
+
+// Y INF
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p7,p0 = f9, 0x23
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p7) fma.s f8=f8,f1,f0
+(p7) br.ret.spnt b0 ;;
+}
+
+// Y NAN?
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p9,p0 = f9, 0xc3
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p9) fma.s f8=f9,f1,f0
+(p9) br.ret.spnt b0 ;;
+}
+
+L(FREM_Y_ZERO):
+// Y zero? Must be zero at this point
+// because it is the only choice left.
+// Return QNAN indefinite
+
+// X NAN?
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p9,p10 = f8, 0xc3
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p10) fclass.nm p9,p10 = f8, 0xff
+ nop.i 999 ;;
+}
+
+{.mfi
+ nop.m 999
+ (p9) frcpa f11,p7=f8,f0
+ nop.i 0;;
+}
+
+{ .mfi
+ nop.m 999
+(p10) frcpa f11,p7 = f0,f0
+nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fmerge.s f10 = f8, f8
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s f8=f11,f1,f0
+ nop.i 999
+}
+
+
+L(EXP_ERROR_RETURN):
+
+{ .mib
+(p0) mov GR_Parameter_TAG = 125
+ nop.i 999
+(p0) br.sptk __libm_error_region;;
+}
+
+.endp remainderf
+ASM_SIZE_DIRECTIVE(remainderf)
+#ifdef _LIBC
+ASM_SIZE_DIRECTIVE(__remainderf)
+#endif
+
+
+
+.proc __libm_error_region
+__libm_error_region:
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfs [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mib
+ stfs [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
+}
+{ .mib
+ stfs [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support#;; // Call error handling function
+}
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+{ .mmi
+ ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_remainderl.S b/sysdeps/ia64/fpu/e_remainderl.S
new file mode 100644
index 0000000..7a46575
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_remainderl.S
@@ -0,0 +1,619 @@
+.file "remainderl.asm"
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
+// Bob Norin, Shane Story, and Ping Tak Peter Tang of the Computational
+// Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//====================================================================
+// 2/02/00 Initial version
+// 3/02/00 New algorithm
+// 4/04/00 Unwind support added
+// 7/21/00 Fixed quotient=2^{24*m+23}*1.q1...q23 1 bug
+// 8/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+//11/29/00 Set FR_Y to f9
+//
+// API
+//====================================================================
+// long double remainderl(long double,long double);
+//
+// Overview of operation
+//====================================================================
+// remainder(a,b)=a-i*b,
+// where i is an integer such that, if b!=0 and a is finite,
+// |a/b-i|<=1/2. If |a/b-i|=1/2, i is even.
+//
+// Algorithm
+//====================================================================
+// a). eliminate special cases
+// b). if |a/b|<0.25 (first quotient estimate), return a
+// c). use single precision divide algorithm to get quotient q
+// rounded to 24 bits of precision
+// d). calculate partial remainders (using both q and q-ulp);
+// select one and RZ(a/b) based on the sign of |a|-|b|*q
+// e). if the exponent difference (exponent(a)-exponent(b))
+// is less than 24 (quotient estimate<2^{24}-2), use RZ(a/b)
+// and sticky bits to round to integer; exit loop and
+// calculate final remainder
+// f). if exponent(a)-exponent(b)>=24, select new value of a as
+// the partial remainder calculated using RZ(a/b);
+// repeat from c).
+//
+// Special cases
+//====================================================================
+// a=+/- Inf, or b=+/-0: return NaN, call libm_error_support
+// a=NaN or b=NaN: return NaN
+
+#include "libm_support.h"
+
+//
+// Registers used
+//====================================================================
+// Predicate registers: p6-p14
+// General registers: r2,r3,r28,r29,r32 (ar.pfs), r33-r39
+// Floating point registers: f6-f15,f32
+//
+.section .text
+
+
+GR_SAVE_B0 = r33
+GR_SAVE_PFS = r34
+GR_SAVE_GP = r35
+GR_SAVE_SP = r36
+
+GR_Parameter_X = r37
+GR_Parameter_Y = r38
+GR_Parameter_RESULT = r39
+GR_Parameter_TAG = r40
+
+FR_X = f10
+FR_Y = f9
+FR_RESULT = f8
+
+
+
+
+ .proc remainderl#
+ .align 32
+ .global remainderl#
+ .align 32
+
+remainderl:
+#ifdef _LIBC
+.global __remainderl
+.type __remainderl,@function
+__remainderl:
+#endif
+// inputs in f8, f9
+// result in f8
+
+{ .mfi
+ alloc r32=ar.pfs,1,4,4,0
+ // f13=|a|
+ fmerge.s f13=f0,f8
+ nop.i 0
+}
+ {.mfi
+ getf.sig r29=f9
+ // f14=|b|
+ fmerge.s f14=f0,f9
+ nop.i 0;;
+}
+ {.mlx
+ mov r28=0x2ffdd
+ // r2=2^{23}
+ movl r3=0x4b000000;;
+}
+
+
+{.mmi
+setf.exp f32=r28
+nop.m 0
+// y pseudo-zero ?
+cmp.eq p11,p10=r29,r0;;
+}
+
+// Y +-NAN, +-inf, +-0? p11
+{ .mfi
+ nop.m 999
+(p10) fclass.m p11,p10 = f9, 0xe7
+ nop.i 999
+}
+// qnan snan inf norm unorm 0 -+
+// 1 1 1 0 0 0 11
+// e 3
+// X +-NAN, +-inf, ? p9
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p9,p8 = f8, 0xe3
+ nop.i 999;;
+}
+
+{.mfi
+ nop.m 0
+ mov f12=f0
+ nop.i 0
+}
+{ .mfi
+ // set p7=1
+ cmp.eq.unc p7,p0=r0,r0
+ // Step (1)
+ // y0 = 1 / b in f10
+ frcpa.s1 f10,p6=f13,f14
+ nop.i 0;;
+}
+// Y +-NAN, +-inf, +-0? p11
+{ .mfi
+ nop.m 999
+ // pseudo-NaN ?
+(p10) fclass.nm p11,p0 = f9, 0xff
+ nop.i 999
+}
+
+// qnan snan inf norm unorm 0 -+
+// 1 1 1 0 0 0 11
+// e 3
+// X +-NAN, +-inf, ? p9
+
+{ .mfi
+ nop.m 999
+(p8) fclass.nm p9,p0 = f8, 0xff
+ nop.i 999;;
+}
+
+{.bbb
+ (p9) br.cond.spnt L(FREM_X_NAN_INF)
+ (p11) br.cond.spnt L(FREM_Y_NAN_INF_ZERO)
+ nop.b 0
+} {.mfi
+ nop.m 0
+ // set D flag if a (f8) is denormal
+ fnma.s0 f6=f8,f1,f8
+ nop.i 0;;
+}
+
+L(remloop24):
+ { .mfi
+ nop.m 0
+ // Step (2)
+ // q0 = a * y0 in f15
+ (p6) fma.s1 f12=f13,f10,f0
+ nop.i 0
+} { .mfi
+ nop.m 0
+ // Step (3)
+ // e0 = 1 - b * y0 in f7
+ (p6) fnma.s1 f7=f14,f10,f1
+ nop.i 0;;
+} {.mlx
+ nop.m 0
+ // r2=1.25*2^{-24}
+ movl r2=0x33a00000;;
+}
+
+{.mfi
+ nop.m 0
+ // q1=q0*(1+e0)
+ fma.s1 f15=f12,f7,f12
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ // Step (4)
+ // e1 = e0 * e0 + E in f7
+ (p6) fma.s1 f7=f7,f7,f32
+ nop.i 0;;
+}
+ {.mii
+ (p7) getf.exp r29=f12
+ (p7) mov r28=0xfffd
+ nop.i 0;;
+}
+
+ { .mfi
+ // f12=2^{23}
+ setf.s f12=r3
+ // Step (5)
+ // q2 = q1 + e1 * q1 in f11
+ (p6) fma.s.s1 f11=f7,f15,f15
+ nop.i 0
+} { .mfi
+ nop.m 0
+ // Step (6)
+ // q2 = q1 + e1 * q1 in f6
+ (p6) fma.s1 f6=f7,f15,f15
+ nop.i 0;;
+}
+
+ {.mmi
+ // f15=1.25*2^{-24}
+ setf.s f15=r2
+ // q<1/4 ? (i.e. expon< -2)
+ (p7) cmp.gt p7,p0=r28,r29
+ nop.i 0;;
+}
+
+{.mfb
+ // r29= -32+bias
+ mov r29=0xffdf
+ // if |a/b|<1/4, set D flag before returning
+ (p7) fma.s0 f9=f9,f0,f8
+ nop.b 0;;
+}
+ {.mfb
+ nop.m 0
+ // can be combined with bundle above if sign of 0 or
+ // FTZ enabled are not important
+ (p7) fmerge.s f8=f8,f9
+ // return if |a|<4*|b| (estimated quotient < 1/4)
+ (p7) br.ret.spnt b0;;
+}
+ {.mfi
+ // f7=2^{-32}
+ setf.exp f7=r29
+ // set f8 to current a value | sign
+ fmerge.s f8=f8,f13
+ nop.i 0;;
+}
+ {.mfi
+ getf.exp r28=f6
+ // last step ? (q<2^{23})
+ fcmp.lt.unc.s1 p0,p12=f6,f12
+ nop.i 0;;
+}
+ {.mfi
+ nop.m 0
+ // r=a-b*q
+ fnma.s1 f6=f14,f11,f13
+ nop.i 0
+} {.mfi
+ // r2=23+bias
+ mov r2=0xffff+23
+ // q'=q-q*(1.25*2^{-24}) (q'=q-ulp)
+ fnma.s.s1 f15=f11,f15,f11
+ nop.i 0;;
+}
+ {.mmi
+ nop.m 0
+ cmp.eq p11,p14=r2,r28
+ nop.i 0;;
+}
+
+.pred.rel "mutex",p11,p14
+ {.mfi
+ nop.m 0
+ // if exp_q=2^23, then r=a-b*2^{23}
+ (p11) fnma.s1 f13=f12,f14,f13
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // r2=a-b*q'
+ (p14) fnma.s1 f13=f14,f15,f13
+ nop.i 0;;
+}
+ {.mfi
+ nop.m 0
+ // r>0 iff q=RZ(a/b) and inexact
+ fcmp.gt.unc.s1 p8,p0=f6,f0
+ nop.i 0
+} {.mfi
+ nop.m 0
+ // r<0 iff q'=RZ(a/b) and inexact
+ (p14) fcmp.lt.unc.s1 p9,p10=f6,f0
+ nop.i 0;;
+}
+
+.pred.rel "mutex",p8,p9
+ {.mfi
+ nop.m 0
+ // (p8) Q=q+(last iteration ? sticky bits:0)
+ // i.e. Q=q+q*x (x=2^{-32} or 0)
+ (p8) fma.s1 f11=f11,f7,f11
+ nop.i 0
+} {.mfi
+ nop.m 0
+ // (p9) Q=q'+(last iteration ? sticky bits:0)
+ // i.e. Q=q'+q'*x (x=2^{-32} or 0)
+ (p9) fma.s1 f11=f15,f7,f15
+ nop.i 0;;
+}
+
+ {.mfb
+ nop.m 0
+ // (p9) set r=r2 (new a, if not last iteration)
+ // (p10) new a =r
+ (p10) mov f13=f6
+ (p12) br.cond.sptk L(remloop24);;
+}
+
+// last iteration
+ {.mfi
+ nop.m 0
+ // set f9=|b|*sgn(a)
+ fmerge.s f9=f8,f9
+ nop.i 0
+}
+ {.mfi
+ nop.m 0
+ // round to integer
+ fcvt.fx.s1 f11=f11
+ nop.i 0;;
+}
+ {.mfi
+ nop.m 0
+ // save sign of a
+ fmerge.s f7=f8,f8
+ nop.i 0
+} {.mfi
+ nop.m 0
+ // normalize
+ fcvt.xf f11=f11
+ nop.i 0;;
+}
+ {.mfi
+ nop.m 0
+ // This can be removed if sign of 0 is not important
+ // get remainder using sf1
+ fnma.s1 f12=f9,f11,f8
+ nop.i 0
+}
+ {.mfi
+ nop.m 0
+ // get remainder
+ fnma.s0 f8=f9,f11,f8
+ nop.i 0;;
+}
+ {.mfi
+ nop.m 0
+ // f12=0?
+ // This can be removed if sign of 0 is not important
+ fcmp.eq.unc.s1 p8,p0=f12,f0
+ nop.i 0;;
+}
+ {.mfb
+ nop.m 0
+ // if f8=0, set sign correctly
+ // This can be removed if sign of 0 is not important
+ (p8) fmerge.s f8=f7,f8
+ // return
+ br.ret.sptk b0;;
+}
+
+
+
+L(FREM_X_NAN_INF):
+
+// Y zero ?
+{.mfi
+ nop.m 0
+ fma.s1 f10=f9,f1,f0
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ fcmp.eq.unc.s1 p11,p0=f10,f0
+ nop.i 0;;
+}
+{.mib
+ nop.m 0
+ nop.i 0
+ // if Y zero
+ (p11) br.cond.spnt L(FREM_Y_ZERO);;
+}
+
+// X infinity? Return QNAN indefinite
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p8,p0 = f8, 0x23
+ nop.i 999
+}
+// X infinity? Return QNAN indefinite
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p11,p0 = f8, 0x23
+ nop.i 999;;
+}
+// Y NaN ?
+{.mfi
+ nop.m 999
+(p8) fclass.m.unc p0,p8=f9,0xc3
+ nop.i 0;;
+}
+{.mfi
+ nop.m 999
+ // also set Denormal flag if necessary
+(p8) fnma.s0 f9=f9,f1,f9
+ nop.i 0
+}
+{ .mfi
+ nop.m 999
+(p8) frcpa.s0 f8,p7 = f8,f8
+ nop.i 999 ;;
+}
+
+{.mfi
+ nop.m 999
+(p11) mov f10=f8
+ nop.i 0
+}
+{ .mfi
+ nop.m 999
+(p8) fma f8=f8,f1,f0
+ nop.i 0 ;;
+}
+
+{ .mfb
+ nop.m 999
+ frcpa.s0 f8,p7=f8,f9
+ (p11) br.cond.spnt L(EXP_ERROR_RETURN);;
+}
+{ .mib
+ nop.m 0
+ nop.i 0
+ br.ret.spnt b0 ;;
+}
+
+
+L(FREM_Y_NAN_INF_ZERO):
+// Y INF
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p7,p0 = f9, 0x23
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p7) fma f8=f8,f1,f0
+(p7) br.ret.spnt b0 ;;
+}
+
+// Y NAN?
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p9,p10 = f9, 0xc3
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p10) fclass.nm p9,p0 = f9, 0xff
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p9) fma f8=f9,f1,f0
+(p9) br.ret.spnt b0 ;;
+}
+
+L(FREM_Y_ZERO):
+// Y zero? Must be zero at this point
+// because it is the only choice left.
+// Return QNAN indefinite
+
+// X NAN?
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p9,p10 = f8, 0xc3
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p10) fclass.nm p9,p10 = f8, 0xff
+ nop.i 999 ;;
+}
+
+{.mfi
+ nop.m 999
+ (p9) frcpa f11,p7=f8,f0
+ nop.i 0;;
+}
+{ .mfi
+ nop.m 999
+(p10) frcpa f11,p7 = f0,f0
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fmerge.s f10 = f8, f8
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma f8=f11,f1,f0
+ nop.i 999;;
+}
+
+L(EXP_ERROR_RETURN):
+
+{ .mib
+(p0) mov GR_Parameter_TAG = 123
+ nop.i 999
+(p0) br.sptk __libm_error_region;;
+}
+
+.endp remainderl
+ASM_SIZE_DIRECTIVE(remainderl)
+#ifdef _LIBC
+ASM_SIZE_DIRECTIVE(__remainderl)
+#endif
+
+.proc __libm_error_region
+__libm_error_region:
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfe [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mib
+ stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
+}
+{ .mib
+ stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+{ .mmi
+ ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_scalb.S b/sysdeps/ia64/fpu/e_scalb.S
new file mode 100644
index 0000000..60be3b3
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_scalb.S
@@ -0,0 +1,551 @@
+.file "scalb.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00 Initial version
+// 1/26/01 Scalb completely reworked and now standalone version
+//
+// API
+//==============================================================
+// double = scalb (double x, double n)
+// input floating point f8 and floating point f9
+// output floating point f8
+//
+// Returns x* 2**n using an fma and detects overflow
+// and underflow.
+//
+//
+
+#include "libm_support.h"
+
+FR_Floating_X = f8
+FR_Result = f8
+FR_Floating_N = f9
+FR_Result2 = f9
+FR_Norm_N = f10
+FR_Result3 = f11
+FR_Norm_X = f12
+FR_N_float_int = f13
+FR_Two_N = f14
+FR_Two_to_Big = f15
+FR_Big = f6
+FR_NBig = f7
+
+GR_N_Biased = r15
+GR_Big = r16
+GR_NBig = r17
+GR_Scratch = r18
+GR_Scratch1 = r19
+GR_Bias = r20
+GR_N_as_int = r21
+
+GR_SAVE_B0 = r32
+GR_SAVE_GP = r33
+GR_SAVE_PFS = r34
+GR_Parameter_X = r35
+GR_Parameter_Y = r36
+GR_Parameter_RESULT = r37
+GR_Tag = r38
+
+.align 32
+.global scalb
+
+.section .text
+.proc scalb
+.align 32
+
+scalb:
+#ifdef _LIBC
+.global __ieee754_scalb
+.type __ieee754_scalb,@function
+__ieee754_scalb:
+#endif
+
+//
+// Is x NAN, INF, ZERO, +-?
+//
+{ .mfi
+ alloc r32=ar.pfs,0,3,4,0
+ fclass.m.unc p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero
+ addl GR_Scratch = 0x019C3F,r0
+}
+//
+// Is y a NAN, INF, ZERO, +-?
+//
+{ .mfi
+ nop.m 999
+ fclass.m.unc p6,p0 = FR_Floating_N, 0xe7 //@snan | @qnan | @inf | @zero
+ addl GR_Scratch1 = 0x063BF,r0
+}
+;;
+
+//
+// Convert N to a fp integer
+// Normalize x
+//
+{ .mfi
+ nop.m 0
+ fnorm.s1 FR_Norm_N = FR_Floating_N
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fnorm.s1 FR_Norm_X = FR_Floating_X
+ nop.i 999
+};;
+
+//
+// Create 2*big
+// Create 2**-big
+// Normalize x
+// Branch on special values.
+//
+{ .mib
+ setf.exp FR_Big = GR_Scratch
+ nop.i 0
+(p6) br.cond.spnt L(SCALB_NAN_INF_ZERO)
+}
+{ .mib
+ setf.exp FR_NBig = GR_Scratch1
+ nop.i 0
+(p7) br.cond.spnt L(SCALB_NAN_INF_ZERO)
+};;
+
+//
+// Convert N to a fp integer
+// Create -35000
+//
+{ .mfi
+ addl GR_Scratch = 1,r0
+ fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N
+ addl GR_NBig = -35000,r0
+}
+;;
+
+//
+// Put N if a GP register
+// Convert N_float_int to floating point value
+// Create 35000
+// Build the exponent Bias
+//
+{ .mii
+ getf.sig GR_N_as_int = FR_N_float_int
+ shl GR_Scratch = GR_Scratch,63
+ addl GR_Big = 35000,r0
+}
+{ .mfi
+ addl GR_Bias = 0x0FFFF,r0
+ fcvt.xf FR_N_float_int = FR_N_float_int
+ nop.i 0
+};;
+
+//
+// Catch those fp values that are beyond 2**64-1
+// Is N > 35000
+// Is N < -35000
+//
+{ .mfi
+ cmp.ne.unc p9,p10 = GR_N_as_int,GR_Scratch
+ nop.f 0
+ nop.i 0
+}
+{ .mmi
+ cmp.ge.unc p6, p0 = GR_N_as_int, GR_Big
+ cmp.le.unc p8, p0 = GR_N_as_int, GR_NBig
+ nop.i 0
+};;
+
+//
+// Is N really an int, only for those non-int indefinites?
+// Create exp bias.
+//
+{ .mfi
+ add GR_N_Biased = GR_Bias,GR_N_as_int
+(p9) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int
+ nop.i 0
+};;
+
+//
+// Branch and return if N is not an int.
+// Main path, create 2**N
+//
+{ .mfi
+ setf.exp FR_Two_N = GR_N_Biased
+ nop.i 999
+}
+{ .mfb
+ nop.m 0
+(p7) frcpa f8,p11 = f0,f0
+(p7) br.ret.spnt b0
+};;
+
+//
+// Set denormal on denormal input x and denormal input N
+//
+{ .mfi
+ nop.m 999
+(p10)fcmp.ge.s1 p6,p8 = FR_Norm_N,f0
+ nop.i 0
+};;
+{ .mfi
+ nop.m 999
+ fcmp.ge.s0 p0,p11 = FR_Floating_X,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fcmp.ge.s0 p12,p13 = FR_Floating_N,f0
+ nop.i 0
+};;
+
+//
+// Adjust 2**N if N was very small or very large
+//
+
+{ .mfi
+ nop.m 0
+(p6) fma.s1 FR_Two_N = FR_Big,f1,f0
+ nop.i 0
+}
+{ .mlx
+ nop.m 999
+(p0) movl GR_Scratch = 0x00000000000303FF
+};;
+{ .mfi
+ nop.m 0
+(p8) fma.s1 FR_Two_N = FR_NBig,f1,f0
+ nop.i 0
+}
+{ .mlx
+ nop.m 999
+(p0) movl GR_Scratch1= 0x00000000000103FF
+};;
+
+// Set up necessary status fields
+//
+// S0 user supplied status
+// S2 user supplied status + WRE + TD (Overflows)
+// S3 user supplied status + FZ + TD (Underflows)
+//
+{ .mfi
+ nop.m 999
+(p0) fsetc.s3 0x7F,0x41
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fsetc.s2 0x7F,0x42
+ nop.i 999
+};;
+
+//
+// Do final operation
+//
+{ .mfi
+ setf.exp FR_NBig = GR_Scratch
+ fma.d.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.d.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
+ nop.i 999
+};;
+{ .mfi
+ setf.exp FR_Big = GR_Scratch1
+ fma.d.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
+ nop.i 999
+};;
+
+// Check for overflow or underflow.
+//
+// S0 user supplied status
+// S2 user supplied status + WRE + TD (Overflow)
+// S3 user supplied status + FZ + TD (Underflow)
+//
+//
+// Restore s3
+// Restore s2
+//
+{ .mfi
+ nop.m 0
+ fsetc.s3 0x7F,0x40
+ nop.i 999
+}
+{ .mfi
+ nop.m 0
+ fsetc.s2 0x7F,0x40
+ nop.i 999
+};;
+
+//
+// Is the result zero?
+//
+{ .mfi
+ nop.m 999
+ fclass.m.unc p6, p0 = FR_Result3, 0x007
+ nop.i 999
+}
+{ .mfi
+ addl GR_Tag = 53, r0
+ fcmp.ge.unc.s1 p7, p8 = FR_Result2 , FR_Big
+ nop.i 0
+};;
+
+//
+// Detect masked underflow - Tiny + Inexact Only
+//
+{ .mfi
+ nop.m 999
+(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
+ nop.i 999
+};;
+
+//
+// Is result bigger the allowed range?
+// Branch out for underflow
+//
+{ .mfb
+(p6) addl GR_Tag = 54, r0
+(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
+(p6) br.cond.spnt L(SCALB_UNDERFLOW)
+};;
+
+//
+// Branch out for overflow
+//
+{ .mbb
+ nop.m 0
+(p7) br.cond.spnt L(SCALB_OVERFLOW)
+(p9) br.cond.spnt L(SCALB_OVERFLOW)
+};;
+
+//
+// Return from main path.
+//
+{ .mfb
+ nop.m 999
+ nop.f 0
+ br.ret.sptk b0;;
+}
+
+L(SCALB_NAN_INF_ZERO):
+
+//
+// Convert N to a fp integer
+//
+{ .mfi
+ addl GR_Scratch = 1,r0
+ fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N
+ nop.i 999
+}
+{ .mfi
+ nop.m 0
+ fclass.m.unc p6,p0 = FR_Floating_N, 0xc3 //@snan | @qnan
+ nop.i 0
+};;
+{ .mfi
+ nop.m 0
+ fclass.m.unc p7,p0 = FR_Floating_X, 0xc3 //@snan | @qnan
+ shl GR_Scratch = GR_Scratch,63
+};;
+{ .mfi
+ nop.m 0
+ fclass.m.unc p8,p0 = FR_Floating_N, 0x21 // @inf
+ nop.i 0
+}
+ { .mfi
+ nop.m 0
+ fclass.m.unc p9,p0 = FR_Floating_N, 0x22 // @-inf
+ nop.i 0
+};;
+
+//
+// Either X or N is a Nan, return result and possible raise invalid.
+//
+{ .mfb
+ nop.m 0
+(p6) fma.d.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0
+(p6) br.ret.spnt b0
+};;
+{ .mfb
+ getf.sig GR_N_as_int = FR_N_float_int
+(p7) fma.d.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0
+(p7) br.ret.spnt b0
+};;
+
+//
+// If N + Inf do something special
+// For N = -Inf, create Int
+//
+{ .mfb
+ nop.m 0
+(p8) fma.d.s0 FR_Result = FR_Floating_X, FR_Floating_N,f0
+(p8) br.ret.spnt b0
+}
+{ .mfi
+ nop.m 0
+(p9) fnma.d.s0 FR_Floating_N = FR_Floating_N, f1, f0
+ nop.i 0
+};;
+
+//
+// If N==-Inf,return x/(-N)
+//
+{ .mfb
+ nop.m 0
+(p9) frcpa.s0 FR_Result,p6 = FR_Floating_X,FR_Floating_N
+(p9) br.ret.spnt b0
+};;
+
+//
+// Convert N_float_int to floating point value
+//
+{ .mfi
+ cmp.ne.unc p9,p0 = GR_N_as_int,GR_Scratch
+ fcvt.xf FR_N_float_int = FR_N_float_int
+ nop.i 0
+};;
+
+//
+// Is N an integer.
+//
+{ .mfi
+ nop.m 0
+(p9) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int
+ nop.i 0
+};;
+
+//
+// If N not an int, return NaN and raise invalid.
+//
+{ .mfb
+ nop.m 0
+(p7) frcpa.s0 FR_Result,p6 = f0,f0
+(p7) br.ret.spnt b0
+};;
+
+//
+// Always return x in other path.
+//
+{ .mfb
+ nop.m 0
+ fma.d.s0 FR_Result = FR_Floating_X,f1,f0
+ br.ret.sptk b0
+};;
+
+.endp scalb
+ASM_SIZE_DIRECTIVE(scalb)
+#ifdef _LIBC
+ASM_SIZE_DIRECTIVE(__ieee754_scalb)
+#endif
+.proc __libm_error_region
+__libm_error_region:
+
+L(SCALB_OVERFLOW):
+L(SCALB_UNDERFLOW):
+
+//
+// Get stack address of N
+//
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs
+}
+//
+// Adjust sp
+//
+{ .mfi
+.fframe 64
+ add sp=-64,sp
+ nop.f 0
+ mov GR_SAVE_GP=gp
+};;
+
+//
+// Store N on stack in correct position
+// Locate the address of x on stack
+//
+{ .mmi
+ stfd [GR_Parameter_Y] = FR_Norm_N,16
+ add GR_Parameter_X = 16,sp
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0
+};;
+
+//
+// Store x on the stack.
+// Get address for result on stack.
+//
+.body
+{ .mib
+ stfd [GR_Parameter_X] = FR_Norm_X
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0
+}
+{ .mib
+ stfd [GR_Parameter_Y] = FR_Result
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support#
+};;
+
+//
+// Get location of result on stack
+//
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+
+//
+// Get the new result
+//
+{ .mmi
+ ldfd FR_Result = [GR_Parameter_RESULT]
+.restore sp
+ add sp = 64,sp
+ mov b0 = GR_SAVE_B0
+};;
+
+//
+// Restore gp, ar.pfs and return
+//
+{ .mib
+ mov gp = GR_SAVE_GP
+ mov ar.pfs = GR_SAVE_PFS
+ br.ret.sptk b0
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_scalbf.S b/sysdeps/ia64/fpu/e_scalbf.S
new file mode 100644
index 0000000..d4dfe5e
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_scalbf.S
@@ -0,0 +1,551 @@
+.file "scalbf.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00 Initial version
+// 1/26/01 Scalb completely reworked and now standalone version
+//
+// API
+//==============================================================
+// float = scalbf (float x, float n)
+// input floating point f8 and floating point f9
+// output floating point f8
+//
+// Returns x* 2**n using an fma and detects overflow
+// and underflow.
+//
+//
+
+#include "libm_support.h"
+
+FR_Floating_X = f8
+FR_Result = f8
+FR_Floating_N = f9
+FR_Result2 = f9
+FR_Norm_N = f10
+FR_Result3 = f11
+FR_Norm_X = f12
+FR_N_float_int = f13
+FR_Two_N = f14
+FR_Two_to_Big = f15
+FR_Big = f6
+FR_NBig = f7
+
+GR_N_Biased = r15
+GR_Big = r16
+GR_NBig = r17
+GR_Scratch = r18
+GR_Scratch1 = r19
+GR_Bias = r20
+GR_N_as_int = r21
+
+GR_SAVE_B0 = r32
+GR_SAVE_GP = r33
+GR_SAVE_PFS = r34
+GR_Parameter_X = r35
+GR_Parameter_Y = r36
+GR_Parameter_RESULT = r37
+GR_Tag = r38
+
+.align 32
+.global scalbf
+
+.section .text
+.proc scalbf
+.align 32
+
+scalbf:
+#ifdef _LIBC
+.global __ieee754_scalbf
+.type __ieee754_scalbf,@function
+__ieee754_scalbf:
+#endif
+
+//
+// Is x NAN, INF, ZERO, +-?
+//
+{ .mfi
+ alloc r32=ar.pfs,0,3,4,0
+ fclass.m.unc p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero
+ addl GR_Scratch = 0x019C3F,r0
+}
+//
+// Is y a NAN, INF, ZERO, +-?
+//
+{ .mfi
+ nop.m 999
+ fclass.m.unc p6,p0 = FR_Floating_N, 0xe7 //@snan | @qnan | @inf | @zero
+ addl GR_Scratch1 = 0x063BF,r0
+}
+;;
+
+//
+// Convert N to a fp integer
+// Normalize x
+//
+{ .mfi
+ nop.m 0
+ fnorm.s1 FR_Norm_N = FR_Floating_N
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fnorm.s1 FR_Norm_X = FR_Floating_X
+ nop.i 999
+};;
+
+//
+// Create 2*big
+// Create 2**-big
+// Normalize x
+// Branch on special values.
+//
+{ .mib
+ setf.exp FR_Big = GR_Scratch
+ nop.i 0
+(p6) br.cond.spnt L(SCALBF_NAN_INF_ZERO)
+}
+{ .mib
+ setf.exp FR_NBig = GR_Scratch1
+ nop.i 0
+(p7) br.cond.spnt L(SCALBF_NAN_INF_ZERO)
+};;
+
+//
+// Convert N to a fp integer
+// Create -35000
+//
+{ .mfi
+ addl GR_Scratch = 1,r0
+ fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N
+ addl GR_NBig = -35000,r0
+}
+;;
+
+//
+// Put N if a GP register
+// Convert N_float_int to floating point value
+// Create 35000
+// Build the exponent Bias
+//
+{ .mii
+ getf.sig GR_N_as_int = FR_N_float_int
+ shl GR_Scratch = GR_Scratch,63
+ addl GR_Big = 35000,r0
+}
+{ .mfi
+ addl GR_Bias = 0x0FFFF,r0
+ fcvt.xf FR_N_float_int = FR_N_float_int
+ nop.i 0
+};;
+
+//
+// Catch those fp values that are beyond 2**64-1
+// Is N > 35000
+// Is N < -35000
+//
+{ .mfi
+ cmp.ne.unc p9,p10 = GR_N_as_int,GR_Scratch
+ nop.f 0
+ nop.i 0
+}
+{ .mmi
+ cmp.ge.unc p6, p0 = GR_N_as_int, GR_Big
+ cmp.le.unc p8, p0 = GR_N_as_int, GR_NBig
+ nop.i 0
+};;
+
+//
+// Is N really an int, only for those non-int indefinites?
+// Create exp bias.
+//
+{ .mfi
+ add GR_N_Biased = GR_Bias,GR_N_as_int
+(p9) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int
+ nop.i 0
+};;
+
+//
+// Branch and return if N is not an int.
+// Main path, create 2**N
+//
+{ .mfi
+ setf.exp FR_Two_N = GR_N_Biased
+ nop.i 999
+}
+{ .mfb
+ nop.m 0
+(p7) frcpa f8,p11 = f0,f0
+(p7) br.ret.spnt b0
+};;
+
+//
+// Set denormal on denormal input x and denormal input N
+//
+{ .mfi
+ nop.m 999
+(p10)fcmp.ge.s1 p6,p8 = FR_Norm_N,f0
+ nop.i 0
+};;
+{ .mfi
+ nop.m 999
+ fcmp.ge.s0 p0,p11 = FR_Floating_X,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fcmp.ge.s0 p12,p13 = FR_Floating_N,f0
+ nop.i 0
+};;
+
+//
+// Adjust 2**N if N was very small or very large
+//
+
+{ .mfi
+ nop.m 0
+(p6) fma.s1 FR_Two_N = FR_Big,f1,f0
+ nop.i 0
+}
+{ .mlx
+ nop.m 999
+(p0) movl GR_Scratch = 0x000000000003007F
+};;
+{ .mfi
+ nop.m 0
+(p8) fma.s1 FR_Two_N = FR_NBig,f1,f0
+ nop.i 0
+}
+{ .mlx
+ nop.m 999
+(p0) movl GR_Scratch1= 0x000000000001007F
+};;
+
+// Set up necessary status fields
+//
+// S0 user supplied status
+// S2 user supplied status + WRE + TD (Overflows)
+// S3 user supplied status + FZ + TD (Underflows)
+//
+{ .mfi
+ nop.m 999
+(p0) fsetc.s3 0x7F,0x41
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fsetc.s2 0x7F,0x42
+ nop.i 999
+};;
+
+//
+// Do final operation
+//
+{ .mfi
+ setf.exp FR_NBig = GR_Scratch
+ fma.s.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
+ nop.i 999
+};;
+{ .mfi
+ setf.exp FR_Big = GR_Scratch1
+ fma.s.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
+ nop.i 999
+};;
+
+// Check for overflow or underflow.
+//
+// S0 user supplied status
+// S2 user supplied status + WRE + TD (Overflow)
+// S3 user supplied status + FZ + TD (Underflow)
+//
+//
+// Restore s3
+// Restore s2
+//
+{ .mfi
+ nop.m 0
+ fsetc.s3 0x7F,0x40
+ nop.i 999
+}
+{ .mfi
+ nop.m 0
+ fsetc.s2 0x7F,0x40
+ nop.i 999
+};;
+
+//
+// Is the result zero?
+//
+{ .mfi
+ nop.m 999
+ fclass.m.unc p6, p0 = FR_Result3, 0x007
+ nop.i 999
+}
+{ .mfi
+ addl GR_Tag = 55, r0
+ fcmp.ge.unc.s1 p7, p8 = FR_Result2 , FR_Big
+ nop.i 0
+};;
+
+//
+// Detect masked underflow - Tiny + Inexact Only
+//
+{ .mfi
+ nop.m 999
+(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
+ nop.i 999
+};;
+
+//
+// Is result bigger the allowed range?
+// Branch out for underflow
+//
+{ .mfb
+(p6) addl GR_Tag = 56, r0
+(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
+(p6) br.cond.spnt L(SCALBF_UNDERFLOW)
+};;
+
+//
+// Branch out for overflow
+//
+{ .mbb
+ nop.m 0
+(p7) br.cond.spnt L(SCALBF_OVERFLOW)
+(p9) br.cond.spnt L(SCALBF_OVERFLOW)
+};;
+
+//
+// Return from main path.
+//
+{ .mfb
+ nop.m 999
+ nop.f 0
+ br.ret.sptk b0;;
+}
+
+L(SCALBF_NAN_INF_ZERO):
+
+//
+// Convert N to a fp integer
+//
+{ .mfi
+ addl GR_Scratch = 1,r0
+ fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N
+ nop.i 999
+}
+{ .mfi
+ nop.m 0
+ fclass.m.unc p6,p0 = FR_Floating_N, 0xc3 //@snan | @qnan
+ nop.i 0
+};;
+{ .mfi
+ nop.m 0
+ fclass.m.unc p7,p0 = FR_Floating_X, 0xc3 //@snan | @qnan
+ shl GR_Scratch = GR_Scratch,63
+};;
+{ .mfi
+ nop.m 0
+ fclass.m.unc p8,p0 = FR_Floating_N, 0x21 // @inf
+ nop.i 0
+}
+ { .mfi
+ nop.m 0
+ fclass.m.unc p9,p0 = FR_Floating_N, 0x22 // @-inf
+ nop.i 0
+};;
+
+//
+// Either X or N is a Nan, return result and possible raise invalid.
+//
+{ .mfb
+ nop.m 0
+(p6) fma.s.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0
+(p6) br.ret.spnt b0
+};;
+{ .mfb
+ getf.sig GR_N_as_int = FR_N_float_int
+(p7) fma.s.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0
+(p7) br.ret.spnt b0
+};;
+
+//
+// If N + Inf do something special
+// For N = -Inf, create Int
+//
+{ .mfb
+ nop.m 0
+(p8) fma.s.s0 FR_Result = FR_Floating_X, FR_Floating_N,f0
+(p8) br.ret.spnt b0
+}
+{ .mfi
+ nop.m 0
+(p9) fnma.s.s0 FR_Floating_N = FR_Floating_N, f1, f0
+ nop.i 0
+};;
+
+//
+// If N==-Inf,return x/(-N)
+//
+{ .mfb
+ nop.m 0
+(p9) frcpa.s0 FR_Result,p6 = FR_Floating_X,FR_Floating_N
+(p9) br.ret.spnt b0
+};;
+
+//
+// Convert N_float_int to floating point value
+//
+{ .mfi
+ cmp.ne.unc p9,p0 = GR_N_as_int,GR_Scratch
+ fcvt.xf FR_N_float_int = FR_N_float_int
+ nop.i 0
+};;
+
+//
+// Is N an integer.
+//
+{ .mfi
+ nop.m 0
+(p9) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int
+ nop.i 0
+};;
+
+//
+// If N not an int, return NaN and raise invalid.
+//
+{ .mfb
+ nop.m 0
+(p7) frcpa.s0 FR_Result,p6 = f0,f0
+(p7) br.ret.spnt b0
+};;
+
+//
+// Always return x in other path.
+//
+{ .mfb
+ nop.m 0
+ fma.s.s0 FR_Result = FR_Floating_X,f1,f0
+ br.ret.sptk b0
+};;
+
+.endp scalbf
+ASM_SIZE_DIRECTIVE(scalbf)
+#ifdef _LIBC
+ASM_SIZE_DIRECTIVE(__ieee754_scalbf)
+#endif
+.proc __libm_error_region
+__libm_error_region:
+
+L(SCALBF_OVERFLOW):
+L(SCALBF_UNDERFLOW):
+
+//
+// Get stack address of N
+//
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs
+}
+//
+// Adjust sp
+//
+{ .mfi
+.fframe 64
+ add sp=-64,sp
+ nop.f 0
+ mov GR_SAVE_GP=gp
+};;
+
+//
+// Store N on stack in correct position
+// Locate the address of x on stack
+//
+{ .mmi
+ stfs [GR_Parameter_Y] = FR_Norm_N,16
+ add GR_Parameter_X = 16,sp
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0
+};;
+
+//
+// Store x on the stack.
+// Get address for result on stack.
+//
+.body
+{ .mib
+ stfs [GR_Parameter_X] = FR_Norm_X
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0
+}
+{ .mib
+ stfs [GR_Parameter_Y] = FR_Result
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support#
+};;
+
+//
+// Get location of result on stack
+//
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+
+//
+// Get the new result
+//
+{ .mmi
+ ldfs FR_Result = [GR_Parameter_RESULT]
+.restore sp
+ add sp = 64,sp
+ mov b0 = GR_SAVE_B0
+};;
+
+//
+// Restore gp, ar.pfs and return
+//
+{ .mib
+ mov gp = GR_SAVE_GP
+ mov ar.pfs = GR_SAVE_PFS
+ br.ret.sptk b0
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_scalbl.S b/sysdeps/ia64/fpu/e_scalbl.S
new file mode 100644
index 0000000..dd493fe
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_scalbl.S
@@ -0,0 +1,551 @@
+.file "scalbl.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00 Initial version
+// 1/26/01 Scalb completely reworked and now standalone version
+//
+// API
+//==============================================================
+// double-extended = scalbl (double-extended x, double-extended n)
+// input floating point f8 and floating point f9
+// output floating point f8
+//
+// Returns x* 2**n using an fma and detects overflow
+// and underflow.
+//
+//
+
+#include "libm_support.h"
+
+FR_Floating_X = f8
+FR_Result = f8
+FR_Floating_N = f9
+FR_Result2 = f9
+FR_Norm_N = f10
+FR_Result3 = f11
+FR_Norm_X = f12
+FR_N_float_int = f13
+FR_Two_N = f14
+FR_Two_to_Big = f15
+FR_Big = f6
+FR_NBig = f7
+
+GR_N_Biased = r15
+GR_Big = r16
+GR_NBig = r17
+GR_Scratch = r18
+GR_Scratch1 = r19
+GR_Bias = r20
+GR_N_as_int = r21
+
+GR_SAVE_B0 = r32
+GR_SAVE_GP = r33
+GR_SAVE_PFS = r34
+GR_Parameter_X = r35
+GR_Parameter_Y = r36
+GR_Parameter_RESULT = r37
+GR_Tag = r38
+
+.align 32
+.global scalbl
+
+.section .text
+.proc scalbl
+.align 32
+
+scalbl:
+#ifdef _LIBC
+.global __ieee754_scalbl
+.type __ieee754_scalbl,@function
+__ieee754_scalbl:
+#endif
+
+//
+// Is x NAN, INF, ZERO, +-?
+//
+{ .mfi
+ alloc r32=ar.pfs,0,3,4,0
+ fclass.m.unc p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero
+ addl GR_Scratch = 0x019C3F,r0
+}
+//
+// Is y a NAN, INF, ZERO, +-?
+//
+{ .mfi
+ nop.m 999
+ fclass.m.unc p6,p0 = FR_Floating_N, 0xe7 //@snan | @qnan | @inf | @zero
+ addl GR_Scratch1 = 0x063BF,r0
+}
+;;
+
+//
+// Convert N to a fp integer
+// Normalize x
+//
+{ .mfi
+ nop.m 0
+ fnorm.s1 FR_Norm_N = FR_Floating_N
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fnorm.s1 FR_Norm_X = FR_Floating_X
+ nop.i 999
+};;
+
+//
+// Create 2*big
+// Create 2**-big
+// Normalize x
+// Branch on special values.
+//
+{ .mib
+ setf.exp FR_Big = GR_Scratch
+ nop.i 0
+(p6) br.cond.spnt L(SCALBL_NAN_INF_ZERO)
+}
+{ .mib
+ setf.exp FR_NBig = GR_Scratch1
+ nop.i 0
+(p7) br.cond.spnt L(SCALBL_NAN_INF_ZERO)
+};;
+
+//
+// Convert N to a fp integer
+// Create -35000
+//
+{ .mfi
+ addl GR_Scratch = 1,r0
+ fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N
+ addl GR_NBig = -35000,r0
+}
+;;
+
+//
+// Put N if a GP register
+// Convert N_float_int to floating point value
+// Create 35000
+// Build the exponent Bias
+//
+{ .mii
+ getf.sig GR_N_as_int = FR_N_float_int
+ shl GR_Scratch = GR_Scratch,63
+ addl GR_Big = 35000,r0
+}
+{ .mfi
+ addl GR_Bias = 0x0FFFF,r0
+ fcvt.xf FR_N_float_int = FR_N_float_int
+ nop.i 0
+};;
+
+//
+// Catch those fp values that are beyond 2**64-1
+// Is N > 35000
+// Is N < -35000
+//
+{ .mfi
+ cmp.ne.unc p9,p10 = GR_N_as_int,GR_Scratch
+ nop.f 0
+ nop.i 0
+}
+{ .mmi
+ cmp.ge.unc p6, p0 = GR_N_as_int, GR_Big
+ cmp.le.unc p8, p0 = GR_N_as_int, GR_NBig
+ nop.i 0
+};;
+
+//
+// Is N really an int, only for those non-int indefinites?
+// Create exp bias.
+//
+{ .mfi
+ add GR_N_Biased = GR_Bias,GR_N_as_int
+(p9) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int
+ nop.i 0
+};;
+
+//
+// Branch and return if N is not an int.
+// Main path, create 2**N
+//
+{ .mfi
+ setf.exp FR_Two_N = GR_N_Biased
+ nop.i 999
+}
+{ .mfb
+ nop.m 0
+(p7) frcpa f8,p11 = f0,f0
+(p7) br.ret.spnt b0
+};;
+
+//
+// Set denormal on denormal input x and denormal input N
+//
+{ .mfi
+ nop.m 999
+(p10)fcmp.ge.s1 p6,p8 = FR_Norm_N,f0
+ nop.i 0
+};;
+{ .mfi
+ nop.m 999
+ fcmp.ge.s0 p0,p11 = FR_Floating_X,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fcmp.ge.s0 p12,p13 = FR_Floating_N,f0
+ nop.i 0
+};;
+
+//
+// Adjust 2**N if N was very small or very large
+//
+
+{ .mfi
+ nop.m 0
+(p6) fma.s1 FR_Two_N = FR_Big,f1,f0
+ nop.i 0
+}
+{ .mlx
+ nop.m 999
+(p0) movl GR_Scratch = 0x0000000000033FFF
+};;
+{ .mfi
+ nop.m 0
+(p8) fma.s1 FR_Two_N = FR_NBig,f1,f0
+ nop.i 0
+}
+{ .mlx
+ nop.m 999
+(p0) movl GR_Scratch1= 0x0000000000013FFF
+};;
+
+// Set up necessary status fields
+//
+// S0 user supplied status
+// S2 user supplied status + WRE + TD (Overflows)
+// S3 user supplied status + FZ + TD (Underflows)
+//
+{ .mfi
+ nop.m 999
+(p0) fsetc.s3 0x7F,0x41
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fsetc.s2 0x7F,0x42
+ nop.i 999
+};;
+
+//
+// Do final operation
+//
+{ .mfi
+ setf.exp FR_NBig = GR_Scratch
+ fma.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
+ nop.i 999
+};;
+{ .mfi
+ setf.exp FR_Big = GR_Scratch1
+ fma.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
+ nop.i 999
+};;
+
+// Check for overflow or underflow.
+//
+// S0 user supplied status
+// S2 user supplied status + WRE + TD (Overflow)
+// S3 user supplied status + FZ + TD (Underflow)
+//
+//
+// Restore s3
+// Restore s2
+//
+{ .mfi
+ nop.m 0
+ fsetc.s3 0x7F,0x40
+ nop.i 999
+}
+{ .mfi
+ nop.m 0
+ fsetc.s2 0x7F,0x40
+ nop.i 999
+};;
+
+//
+// Is the result zero?
+//
+{ .mfi
+ nop.m 999
+ fclass.m.unc p6, p0 = FR_Result3, 0x007
+ nop.i 999
+}
+{ .mfi
+ addl GR_Tag = 51, r0
+ fcmp.ge.unc.s1 p7, p8 = FR_Result2 , FR_Big
+ nop.i 0
+};;
+
+//
+// Detect masked underflow - Tiny + Inexact Only
+//
+{ .mfi
+ nop.m 999
+(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
+ nop.i 999
+};;
+
+//
+// Is result bigger the allowed range?
+// Branch out for underflow
+//
+{ .mfb
+(p6) addl GR_Tag = 52, r0
+(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
+(p6) br.cond.spnt L(SCALBL_UNDERFLOW)
+};;
+
+//
+// Branch out for overflow
+//
+{ .mbb
+ nop.m 0
+(p7) br.cond.spnt L(SCALBL_OVERFLOW)
+(p9) br.cond.spnt L(SCALBL_OVERFLOW)
+};;
+
+//
+// Return from main path.
+//
+{ .mfb
+ nop.m 999
+ nop.f 0
+ br.ret.sptk b0;;
+}
+
+L(SCALBL_NAN_INF_ZERO):
+
+//
+// Convert N to a fp integer
+//
+{ .mfi
+ addl GR_Scratch = 1,r0
+ fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N
+ nop.i 999
+}
+{ .mfi
+ nop.m 0
+ fclass.m.unc p6,p0 = FR_Floating_N, 0xc3 //@snan | @qnan
+ nop.i 0
+};;
+{ .mfi
+ nop.m 0
+ fclass.m.unc p7,p0 = FR_Floating_X, 0xc3 //@snan | @qnan
+ shl GR_Scratch = GR_Scratch,63
+};;
+{ .mfi
+ nop.m 0
+ fclass.m.unc p8,p0 = FR_Floating_N, 0x21 // @inf
+ nop.i 0
+}
+ { .mfi
+ nop.m 0
+ fclass.m.unc p9,p0 = FR_Floating_N, 0x22 // @-inf
+ nop.i 0
+};;
+
+//
+// Either X or N is a Nan, return result and possible raise invalid.
+//
+{ .mfb
+ nop.m 0
+(p6) fma.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0
+(p6) br.ret.spnt b0
+};;
+{ .mfb
+ getf.sig GR_N_as_int = FR_N_float_int
+(p7) fma.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0
+(p7) br.ret.spnt b0
+};;
+
+//
+// If N + Inf do something special
+// For N = -Inf, create Int
+//
+{ .mfb
+ nop.m 0
+(p8) fma.s0 FR_Result = FR_Floating_X, FR_Floating_N,f0
+(p8) br.ret.spnt b0
+}
+{ .mfi
+ nop.m 0
+(p9) fnma.s0 FR_Floating_N = FR_Floating_N, f1, f0
+ nop.i 0
+};;
+
+//
+// If N==-Inf,return x/(-N)
+//
+{ .mfb
+ nop.m 0
+(p9) frcpa.s0 FR_Result,p6 = FR_Floating_X,FR_Floating_N
+(p9) br.ret.spnt b0
+};;
+
+//
+// Convert N_float_int to floating point value
+//
+{ .mfi
+ cmp.ne.unc p9,p0 = GR_N_as_int,GR_Scratch
+ fcvt.xf FR_N_float_int = FR_N_float_int
+ nop.i 0
+};;
+
+//
+// Is N an integer.
+//
+{ .mfi
+ nop.m 0
+(p9) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int
+ nop.i 0
+};;
+
+//
+// If N not an int, return NaN and raise invalid.
+//
+{ .mfb
+ nop.m 0
+(p7) frcpa.s0 FR_Result,p6 = f0,f0
+(p7) br.ret.spnt b0
+};;
+
+//
+// Always return x in other path.
+//
+{ .mfb
+ nop.m 0
+ fma.s0 FR_Result = FR_Floating_X,f1,f0
+ br.ret.sptk b0
+};;
+
+.endp scalbl
+ASM_SIZE_DIRECTIVE(scalbl)
+#ifdef _LIBC
+ASM_SIZE_DIRECTIVE(__ieee754_scalbl)
+#endif
+.proc __libm_error_region
+__libm_error_region:
+
+L(SCALBL_OVERFLOW):
+L(SCALBL_UNDERFLOW):
+
+//
+// Get stack address of N
+//
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs
+}
+//
+// Adjust sp
+//
+{ .mfi
+.fframe 64
+ add sp=-64,sp
+ nop.f 0
+ mov GR_SAVE_GP=gp
+};;
+
+//
+// Store N on stack in correct position
+// Locate the address of x on stack
+//
+{ .mmi
+ stfe [GR_Parameter_Y] = FR_Norm_N,16
+ add GR_Parameter_X = 16,sp
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0
+};;
+
+//
+// Store x on the stack.
+// Get address for result on stack.
+//
+.body
+{ .mib
+ stfe [GR_Parameter_X] = FR_Norm_X
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0
+}
+{ .mib
+ stfe [GR_Parameter_Y] = FR_Result
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support#
+};;
+
+//
+// Get location of result on stack
+//
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+
+//
+// Get the new result
+//
+{ .mmi
+ ldfe FR_Result = [GR_Parameter_RESULT]
+.restore sp
+ add sp = 64,sp
+ mov b0 = GR_SAVE_B0
+};;
+
+//
+// Restore gp, ar.pfs and return
+//
+{ .mib
+ mov gp = GR_SAVE_GP
+ mov ar.pfs = GR_SAVE_PFS
+ br.ret.sptk b0
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_sinh.S b/sysdeps/ia64/fpu/e_sinh.S
new file mode 100644
index 0000000..a478f4e
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_sinh.S
@@ -0,0 +1,1310 @@
+.file "sinh.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00 Initial version
+// 4/04/00 Unwind support added
+// 8/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+// 10/12/00 Update to set denormal operand and underflow flags
+// 1/22/01 Fixed to set inexact flag for small args.
+//
+// API
+//==============================================================
+// double = sinh(double)
+// input floating point f8
+// output floating point f8
+//
+// Registers used
+//==============================================================
+// general registers:
+// r32 -> r47
+// predicate registers used:
+// p6 p7 p8 p9
+// floating-point registers used:
+// f9 -> f15; f32 -> f45;
+// f8 has input, then output
+//
+// Overview of operation
+//==============================================================
+// There are four paths
+// 1. |x| < 0.25 SINH_BY_POLY
+// 2. |x| < 32 SINH_BY_TBL
+// 3. |x| < 2^14 SINH_BY_EXP
+// 4. |x_ >= 2^14 SINH_HUGE
+//
+// For double extended we get infinity for x >= 400c b174 ddc0 31ae c0ea
+// >= 1.0110001.... x 2^13
+// >= 11357.2166
+//
+// But for double we get infinity for x >= 408633ce8fb9f87e
+// >= 1.0110...x 2^9
+// >= +7.10476e+002
+//
+// And for single we get infinity for x >= 42b3a496
+// >= 1.0110... 2^6
+// >= 89.8215
+//
+// SAFE: If there is danger of overflow set SAFE to 0
+// NOT implemented: if there is danger of underflow, set SAFE to 0
+// SAFE for all paths listed below
+//
+// 1. SINH_BY_POLY
+// ===============
+// If |x| is less than the tiny threshold, then clear SAFE
+// For double, the tiny threshold is -1022 = -0x3fe => -3fe + ffff = fc01
+// register-biased, this is fc01
+// For single, the tiny threshold is -126 = -7e => -7e + ffff = ff81
+// If |x| < tiny threshold, set SAFE = 0
+//
+// 2. SINH_BY_TBL
+// =============
+// SAFE: SAFE is always 1 for TBL;
+//
+// 3. SINH_BY_EXP
+// ==============
+// There is a danger of double-extended overflow if N-1 > 16382 = 0x3ffe
+// r34 has N-1; 16382 is in register biased form, 0x13ffd
+// There is danger of double overflow if N-1 > 0x3fe
+// in register biased form, 0x103fd
+// Analagously, there is danger of single overflow if N-1 > 0x7e
+// in register biased form, 0x1007d
+// SAFE: If there is danger of overflow set SAFE to 0
+//
+// 4. SINH_HUGE
+// ============
+// SAFE: SAFE is always 0 for HUGE
+
+#include "libm_support.h"
+
+//
+// Assembly macros
+//==============================================================
+sinh_FR_X = f44
+sinh_FR_X2 = f9
+sinh_FR_X4 = f10
+sinh_FR_SGNX = f40
+sinh_FR_all_ones = f45
+sinh_FR_tmp = f42
+
+sinh_FR_Inv_log2by64 = f9
+sinh_FR_log2by64_lo = f11
+sinh_FR_log2by64_hi = f10
+
+sinh_FR_A1 = f9
+sinh_FR_A2 = f10
+sinh_FR_A3 = f11
+
+sinh_FR_Rcub = f12
+sinh_FR_M_temp = f13
+sinh_FR_R_temp = f13
+sinh_FR_Rsq = f13
+sinh_FR_R = f14
+
+sinh_FR_M = f38
+
+sinh_FR_B1 = f15
+sinh_FR_B2 = f32
+sinh_FR_B3 = f33
+
+sinh_FR_peven_temp1 = f34
+sinh_FR_peven_temp2 = f35
+sinh_FR_peven = f36
+
+sinh_FR_podd_temp1 = f34
+sinh_FR_podd_temp2 = f35
+sinh_FR_podd = f37
+
+sinh_FR_poly_podd_temp1 = f11
+sinh_FR_poly_podd_temp2 = f13
+sinh_FR_poly_peven_temp1 = f11
+sinh_FR_poly_peven_temp2 = f13
+
+sinh_FR_J_temp = f9
+sinh_FR_J = f10
+
+sinh_FR_Mmj = f39
+
+sinh_FR_N_temp1 = f11
+sinh_FR_N_temp2 = f12
+sinh_FR_N = f13
+
+sinh_FR_spos = f14
+sinh_FR_sneg = f15
+
+sinh_FR_Tjhi = f32
+sinh_FR_Tjlo = f33
+sinh_FR_Tmjhi = f34
+sinh_FR_Tmjlo = f35
+
+sinh_GR_mJ = r35
+sinh_GR_J = r36
+
+sinh_AD_mJ = r38
+sinh_AD_J = r39
+sinh_GR_all_ones = r40
+
+sinh_FR_S_hi = f9
+sinh_FR_S_hi_temp = f10
+sinh_FR_S_lo_temp1 = f11
+sinh_FR_S_lo_temp2 = f12
+sinh_FR_S_lo_temp3 = f13
+
+sinh_FR_S_lo = f38
+sinh_FR_C_hi = f39
+
+sinh_FR_C_hi_temp1 = f10
+sinh_FR_Y_hi = f11
+sinh_FR_Y_lo_temp = f12
+sinh_FR_Y_lo = f13
+sinh_FR_SINH = f9
+
+sinh_FR_P1 = f14
+sinh_FR_P2 = f15
+sinh_FR_P3 = f32
+sinh_FR_P4 = f33
+sinh_FR_P5 = f34
+sinh_FR_P6 = f35
+
+sinh_FR_TINY_THRESH = f9
+
+sinh_FR_SINH_temp = f10
+sinh_FR_SCALE = f11
+
+sinh_FR_signed_hi_lo = f10
+
+
+GR_SAVE_PFS = r41
+GR_SAVE_B0 = r42
+GR_SAVE_GP = r43
+
+GR_Parameter_X = r44
+GR_Parameter_Y = r45
+GR_Parameter_RESULT = r46
+
+// Data tables
+//==============================================================
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
+.align 16
+double_sinh_arg_reduction:
+ASM_TYPE_DIRECTIVE(double_sinh_arg_reduction,@object)
+ data8 0xB8AA3B295C17F0BC, 0x00004005
+ data8 0xB17217F7D1000000, 0x00003FF8
+ data8 0xCF79ABC9E3B39804, 0x00003FD0
+ASM_SIZE_DIRECTIVE(double_sinh_arg_reduction)
+
+double_sinh_p_table:
+ASM_TYPE_DIRECTIVE(double_sinh_p_table,@object)
+ data8 0xAAAAAAAAAAAAAAAB, 0x00003FFC
+ data8 0x8888888888888412, 0x00003FF8
+ data8 0xD00D00D00D4D39F2, 0x00003FF2
+ data8 0xB8EF1D28926D8891, 0x00003FEC
+ data8 0xD732377688025BE9, 0x00003FE5
+ data8 0xB08AF9AE78C1239F, 0x00003FDE
+ASM_SIZE_DIRECTIVE(double_sinh_p_table)
+
+double_sinh_ab_table:
+ASM_TYPE_DIRECTIVE(double_sinh_ab_table,@object)
+ data8 0xAAAAAAAAAAAAAAAC, 0x00003FFC
+ data8 0x88888888884ECDD5, 0x00003FF8
+ data8 0xD00D0C6DCC26A86B, 0x00003FF2
+ data8 0x8000000000000002, 0x00003FFE
+ data8 0xAAAAAAAAAA402C77, 0x00003FFA
+ data8 0xB60B6CC96BDB144D, 0x00003FF5
+ASM_SIZE_DIRECTIVE(double_sinh_ab_table)
+
+double_sinh_j_table:
+ASM_TYPE_DIRECTIVE(double_sinh_j_table,@object)
+ data8 0xB504F333F9DE6484, 0x00003FFE, 0x1EB2FB13, 0x00000000
+ data8 0xB6FD91E328D17791, 0x00003FFE, 0x1CE2CBE2, 0x00000000
+ data8 0xB8FBAF4762FB9EE9, 0x00003FFE, 0x1DDC3CBC, 0x00000000
+ data8 0xBAFF5AB2133E45FB, 0x00003FFE, 0x1EE9AA34, 0x00000000
+ data8 0xBD08A39F580C36BF, 0x00003FFE, 0x9EAEFDC1, 0x00000000
+ data8 0xBF1799B67A731083, 0x00003FFE, 0x9DBF517B, 0x00000000
+ data8 0xC12C4CCA66709456, 0x00003FFE, 0x1EF88AFB, 0x00000000
+ data8 0xC346CCDA24976407, 0x00003FFE, 0x1E03B216, 0x00000000
+ data8 0xC5672A115506DADD, 0x00003FFE, 0x1E78AB43, 0x00000000
+ data8 0xC78D74C8ABB9B15D, 0x00003FFE, 0x9E7B1747, 0x00000000
+ data8 0xC9B9BD866E2F27A3, 0x00003FFE, 0x9EFE3C0E, 0x00000000
+ data8 0xCBEC14FEF2727C5D, 0x00003FFE, 0x9D36F837, 0x00000000
+ data8 0xCE248C151F8480E4, 0x00003FFE, 0x9DEE53E4, 0x00000000
+ data8 0xD06333DAEF2B2595, 0x00003FFE, 0x9E24AE8E, 0x00000000
+ data8 0xD2A81D91F12AE45A, 0x00003FFE, 0x1D912473, 0x00000000
+ data8 0xD4F35AABCFEDFA1F, 0x00003FFE, 0x1EB243BE, 0x00000000
+ data8 0xD744FCCAD69D6AF4, 0x00003FFE, 0x1E669A2F, 0x00000000
+ data8 0xD99D15C278AFD7B6, 0x00003FFE, 0x9BBC610A, 0x00000000
+ data8 0xDBFBB797DAF23755, 0x00003FFE, 0x1E761035, 0x00000000
+ data8 0xDE60F4825E0E9124, 0x00003FFE, 0x9E0BE175, 0x00000000
+ data8 0xE0CCDEEC2A94E111, 0x00003FFE, 0x1CCB12A1, 0x00000000
+ data8 0xE33F8972BE8A5A51, 0x00003FFE, 0x1D1BFE90, 0x00000000
+ data8 0xE5B906E77C8348A8, 0x00003FFE, 0x1DF2F47A, 0x00000000
+ data8 0xE8396A503C4BDC68, 0x00003FFE, 0x1EF22F22, 0x00000000
+ data8 0xEAC0C6E7DD24392F, 0x00003FFE, 0x9E3F4A29, 0x00000000
+ data8 0xED4F301ED9942B84, 0x00003FFE, 0x1EC01A5B, 0x00000000
+ data8 0xEFE4B99BDCDAF5CB, 0x00003FFE, 0x1E8CAC3A, 0x00000000
+ data8 0xF281773C59FFB13A, 0x00003FFE, 0x9DBB3FAB, 0x00000000
+ data8 0xF5257D152486CC2C, 0x00003FFE, 0x1EF73A19, 0x00000000
+ data8 0xF7D0DF730AD13BB9, 0x00003FFE, 0x9BB795B5, 0x00000000
+ data8 0xFA83B2DB722A033A, 0x00003FFE, 0x1EF84B76, 0x00000000
+ data8 0xFD3E0C0CF486C175, 0x00003FFE, 0x9EF5818B, 0x00000000
+ data8 0x8000000000000000, 0x00003FFF, 0x00000000, 0x00000000
+ data8 0x8164D1F3BC030773, 0x00003FFF, 0x1F77CACA, 0x00000000
+ data8 0x82CD8698AC2BA1D7, 0x00003FFF, 0x1EF8A91D, 0x00000000
+ data8 0x843A28C3ACDE4046, 0x00003FFF, 0x1E57C976, 0x00000000
+ data8 0x85AAC367CC487B15, 0x00003FFF, 0x9EE8DA92, 0x00000000
+ data8 0x871F61969E8D1010, 0x00003FFF, 0x1EE85C9F, 0x00000000
+ data8 0x88980E8092DA8527, 0x00003FFF, 0x1F3BF1AF, 0x00000000
+ data8 0x8A14D575496EFD9A, 0x00003FFF, 0x1D80CA1E, 0x00000000
+ data8 0x8B95C1E3EA8BD6E7, 0x00003FFF, 0x9D0373AF, 0x00000000
+ data8 0x8D1ADF5B7E5BA9E6, 0x00003FFF, 0x9F167097, 0x00000000
+ data8 0x8EA4398B45CD53C0, 0x00003FFF, 0x1EB70051, 0x00000000
+ data8 0x9031DC431466B1DC, 0x00003FFF, 0x1F6EB029, 0x00000000
+ data8 0x91C3D373AB11C336, 0x00003FFF, 0x1DFD6D8E, 0x00000000
+ data8 0x935A2B2F13E6E92C, 0x00003FFF, 0x9EB319B0, 0x00000000
+ data8 0x94F4EFA8FEF70961, 0x00003FFF, 0x1EBA2BEB, 0x00000000
+ data8 0x96942D3720185A00, 0x00003FFF, 0x1F11D537, 0x00000000
+ data8 0x9837F0518DB8A96F, 0x00003FFF, 0x1F0D5A46, 0x00000000
+ data8 0x99E0459320B7FA65, 0x00003FFF, 0x9E5E7BCA, 0x00000000
+ data8 0x9B8D39B9D54E5539, 0x00003FFF, 0x9F3AAFD1, 0x00000000
+ data8 0x9D3ED9A72CFFB751, 0x00003FFF, 0x9E86DACC, 0x00000000
+ data8 0x9EF5326091A111AE, 0x00003FFF, 0x9F3EDDC2, 0x00000000
+ data8 0xA0B0510FB9714FC2, 0x00003FFF, 0x1E496E3D, 0x00000000
+ data8 0xA27043030C496819, 0x00003FFF, 0x9F490BF6, 0x00000000
+ data8 0xA43515AE09E6809E, 0x00003FFF, 0x1DD1DB48, 0x00000000
+ data8 0xA5FED6A9B15138EA, 0x00003FFF, 0x1E65EBFB, 0x00000000
+ data8 0xA7CD93B4E965356A, 0x00003FFF, 0x9F427496, 0x00000000
+ data8 0xA9A15AB4EA7C0EF8, 0x00003FFF, 0x1F283C4A, 0x00000000
+ data8 0xAB7A39B5A93ED337, 0x00003FFF, 0x1F4B0047, 0x00000000
+ data8 0xAD583EEA42A14AC6, 0x00003FFF, 0x1F130152, 0x00000000
+ data8 0xAF3B78AD690A4375, 0x00003FFF, 0x9E8367C0, 0x00000000
+ data8 0xB123F581D2AC2590, 0x00003FFF, 0x9F705F90, 0x00000000
+ data8 0xB311C412A9112489, 0x00003FFF, 0x1EFB3C53, 0x00000000
+ data8 0xB504F333F9DE6484, 0x00003FFF, 0x1F32FB13, 0x00000000
+ASM_SIZE_DIRECTIVE(double_sinh_j_table)
+
+.align 32
+.global sinh#
+
+.section .text
+.proc sinh#
+.align 32
+
+sinh:
+#ifdef _LIBC
+.global __ieee754_sinh
+.type __ieee754_sinh,@function
+__ieee754_sinh:
+#endif
+
+// X infinity or NAN?
+// Take invalid fault if enabled
+
+
+{ .mfi
+ alloc r32 = ar.pfs,0,12,4,0
+(p0) fclass.m.unc p6,p0 = f8, 0xe3 //@qnan | @snan | @inf
+ mov sinh_GR_all_ones = -1
+}
+;;
+
+
+{ .mfb
+ nop.m 999
+(p6) fma.d.s0 f8 = f8,f1,f8
+(p6) br.ret.spnt b0 ;;
+}
+
+// Put 0.25 in f9; p6 true if x < 0.25
+// Make constant that will generate inexact when squared
+{ .mlx
+ setf.sig sinh_FR_all_ones = sinh_GR_all_ones
+(p0) movl r32 = 0x000000000000fffd ;;
+}
+
+{ .mfi
+(p0) setf.exp f9 = r32
+(p0) fclass.m.unc p7,p0 = f8, 0x07 //@zero
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p0) fmerge.s sinh_FR_X = f0,f8
+(p7) br.ret.spnt b0 ;;
+}
+
+// Identify denormal operands.
+{ .mfi
+ nop.m 999
+ fclass.m.unc p10,p0 = f8, 0x09 // + denorm
+ nop.i 999
+};;
+{ .mfi
+ nop.m 999
+ fclass.m.unc p11,p0 = f8, 0x0a // - denorm
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fmerge.s sinh_FR_SGNX = f8,f1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fcmp.lt.unc.s1 p0,p7 = sinh_FR_X,f9
+ nop.i 999 ;;
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+(p7) br.cond.sptk L(SINH_BY_TBL) ;;
+}
+
+
+L(SINH_BY_POLY):
+
+// POLY cannot overflow so there is no need to call __libm_error_support
+// Set tiny_SAFE (p7) to 1(0) if answer is not tiny
+// Currently we do not use tiny_SAFE. So the setting of tiny_SAFE is
+// commented out.
+//(p0) movl r32 = 0x000000000000fc01
+//(p0) setf.exp f10 = r32
+//(p0) fcmp.lt.unc.s1 p6,p7 = f8,f10
+// Here is essentially the algorithm for SINH_BY_POLY. Care is take for the order
+// of multiplication; and P_1 is not exactly 1/3!, P_2 is not exactly 1/5!, etc.
+// Note that ax = |x|
+// sinh(x) = sign * (series(e^x) - series(e^-x))/2
+// = sign * (ax + ax^3/3! + ax^5/5! + ax^7/7! + ax^9/9! + ax^11/11! + ax^13/13!)
+// = sign * (ax + ax * ( ax^2 * (1/3! + ax^4 * (1/7! + ax^4*1/11!)) )
+// + ax * ( ax^4 * (1/5! + ax^4 * (1/9! + ax^4*1/13!)) ) )
+// = sign * (ax + ax*p_odd + (ax*p_even))
+// = sign * (ax + Y_lo)
+// sinh(x) = sign * (Y_hi + Y_lo)
+// Get the values of P_x from the table
+{ .mfb
+(p0) addl r34 = @ltoff(double_sinh_p_table), gp
+(p10) fma.d.s0 f8 = f8,f8,f8
+(p10) br.ret.spnt b0
+}
+;;
+
+{ .mfb
+ ld8 r34 = [r34]
+(p11) fnma.d.s0 f8 = f8,f8,f8
+(p11) br.ret.spnt b0
+}
+;;
+
+// Calculate sinh_FR_X2 = ax*ax and sinh_FR_X4 = ax*ax*ax*ax
+{ .mmf
+ nop.m 999
+(p0) ldfe sinh_FR_P1 = [r34],16
+(p0) fma.s1 sinh_FR_X2 = sinh_FR_X, sinh_FR_X, f0 ;;
+}
+
+{ .mmi
+(p0) ldfe sinh_FR_P2 = [r34],16 ;;
+(p0) ldfe sinh_FR_P3 = [r34],16
+ nop.i 999 ;;
+}
+
+{ .mmi
+(p0) ldfe sinh_FR_P4 = [r34],16 ;;
+(p0) ldfe sinh_FR_P5 = [r34],16
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p0) ldfe sinh_FR_P6 = [r34],16
+(p0) fma.s1 sinh_FR_X4 = sinh_FR_X2, sinh_FR_X2, f0
+ nop.i 999 ;;
+}
+
+// Calculate sinh_FR_podd = p_odd and sinh_FR_peven = p_even
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_poly_podd_temp1 = sinh_FR_X4, sinh_FR_P5, sinh_FR_P3
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_poly_podd_temp2 = sinh_FR_X4, sinh_FR_poly_podd_temp1, sinh_FR_P1
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_poly_peven_temp1 = sinh_FR_X4, sinh_FR_P6, sinh_FR_P4
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_podd = sinh_FR_X2, sinh_FR_poly_podd_temp2, f0
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_poly_peven_temp2 = sinh_FR_X4, sinh_FR_poly_peven_temp1, sinh_FR_P2
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_peven = sinh_FR_X4, sinh_FR_poly_peven_temp2, f0
+ nop.i 999 ;;
+}
+
+// Calculate sinh_FR_Y_lo = ax*p_odd + (ax*p_even)
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_Y_lo_temp = sinh_FR_X, sinh_FR_peven, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_Y_lo = sinh_FR_X, sinh_FR_podd, sinh_FR_Y_lo_temp
+ nop.i 999 ;;
+}
+
+// Calculate sinh_FR_SINH = Y_hi + Y_lo. Note that ax = Y_hi
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_SINH = sinh_FR_X, f1, sinh_FR_Y_lo
+ nop.i 999 ;;
+}
+// Dummy multiply to generate inexact
+{ .mfi
+ nop.m 999
+(p0) fmpy.s0 sinh_FR_tmp = sinh_FR_all_ones, sinh_FR_all_ones
+ nop.i 999
+}
+
+// Calculate f8 = sign * (Y_hi + Y_lo)
+// Go to return
+{ .mfb
+ nop.m 999
+(p0) fma.d.s0 f8 = sinh_FR_SGNX,sinh_FR_SINH,f0
+(p0) br.ret.sptk b0 ;;
+}
+
+
+L(SINH_BY_TBL):
+
+// Now that we are at TBL; so far all we know is that |x| >= 0.25.
+// The first two steps are the same for TBL and EXP, but if we are HUGE
+// we want to leave now.
+// Double-extended:
+// Go to HUGE if |x| >= 2^14, 1000d (register-biased) is e = 14 (true)
+// Double
+// Go to HUGE if |x| >= 2^10, 10009 (register-biased) is e = 10 (true)
+// Single
+// Go to HUGE if |x| >= 2^7, 10006 (register-biased) is e = 7 (true)
+
+{ .mlx
+ nop.m 999
+(p0) movl r32 = 0x0000000000010009 ;;
+}
+
+{ .mfi
+(p0) setf.exp f9 = r32
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fcmp.ge.unc.s1 p6,p7 = sinh_FR_X,f9
+ nop.i 999 ;;
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+(p6) br.cond.spnt L(SINH_HUGE) ;;
+}
+
+// r32 = 1
+// r34 = N-1
+// r35 = N
+// r36 = j
+// r37 = N+1
+
+// TBL can never overflow
+// sinh(x) = sinh(B+R)
+// = sinh(B)cosh(R) + cosh(B)sinh(R)
+//
+// ax = |x| = M*log2/64 + R
+// B = M*log2/64
+// M = 64*N + j
+// We will calcualte M and get N as (M-j)/64
+// The division is a shift.
+// exp(B) = exp(N*log2 + j*log2/64)
+// = 2^N * 2^(j*log2/64)
+// sinh(B) = 1/2(e^B -e^-B)
+// = 1/2(2^N * 2^(j*log2/64) - 2^-N * 2^(-j*log2/64))
+// sinh(B) = (2^(N-1) * 2^(j*log2/64) - 2^(-N-1) * 2^(-j*log2/64))
+// cosh(B) = (2^(N-1) * 2^(j*log2/64) + 2^(-N-1) * 2^(-j*log2/64))
+// 2^(j*log2/64) is stored as Tjhi + Tjlo , j= -32,....,32
+// Tjhi is double-extended (80-bit) and Tjlo is single(32-bit)
+// R = ax - M*log2/64
+// R = ax - M*log2_by_64_hi - M*log2_by_64_lo
+// exp(R) = 1 + R +R^2(1/2! + R(1/3! + R(1/4! + ... + R(1/n!)...)
+// = 1 + p_odd + p_even
+// where the p_even uses the A coefficients and the p_even uses the B coefficients
+// So sinh(R) = 1 + p_odd + p_even -(1 -p_odd -p_even)/2 = p_odd
+// cosh(R) = 1 + p_even
+// sinh(B) = S_hi + S_lo
+// cosh(B) = C_hi
+// sinh(x) = sinh(B)cosh(R) + cosh(B)sinh(R)
+// ******************************************************
+// STEP 1 (TBL and EXP)
+// ******************************************************
+// Get the following constants.
+// f9 = Inv_log2by64
+// f10 = log2by64_hi
+// f11 = log2by64_lo
+
+{ .mmi
+(p0) adds r32 = 0x1,r0
+(p0) addl r34 = @ltoff(double_sinh_arg_reduction), gp
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ld8 r34 = [r34]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+
+// We want 2^(N-1) and 2^(-N-1). So bias N-1 and -N-1 and
+// put them in an exponent.
+// sinh_FR_spos = 2^(N-1) and sinh_FR_sneg = 2^(-N-1)
+// r39 = 0xffff + (N-1) = 0xffff +N -1
+// r40 = 0xffff - (N +1) = 0xffff -N -1
+
+{ .mlx
+ nop.m 999
+(p0) movl r38 = 0x000000000000fffe ;;
+}
+
+{ .mmi
+(p0) ldfe sinh_FR_Inv_log2by64 = [r34],16 ;;
+(p0) ldfe sinh_FR_log2by64_hi = [r34],16
+ nop.i 999 ;;
+}
+
+{ .mbb
+(p0) ldfe sinh_FR_log2by64_lo = [r34],16
+ nop.b 999
+ nop.b 999 ;;
+}
+
+// Get the A coefficients
+// f9 = A_1
+// f10 = A_2
+// f11 = A_3
+
+{ .mmi
+ nop.m 999
+(p0) addl r34 = @ltoff(double_sinh_ab_table), gp
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ld8 r34 = [r34]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+
+// Calculate M and keep it as integer and floating point.
+// f38 = M = round-to-integer(x*Inv_log2by64)
+// sinh_FR_M = M = truncate(ax/(log2/64))
+// Put the significand of M in r35
+// and the floating point representation of M in sinh_FR_M
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_M = sinh_FR_X, sinh_FR_Inv_log2by64, f0
+ nop.i 999
+}
+
+{ .mfi
+(p0) ldfe sinh_FR_A1 = [r34],16
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fcvt.fx.s1 sinh_FR_M_temp = sinh_FR_M
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fnorm.s1 sinh_FR_M = sinh_FR_M_temp
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p0) getf.sig r35 = sinh_FR_M_temp
+ nop.f 999
+ nop.i 999 ;;
+}
+
+// M is still in r35. Calculate j. j is the signed extension of the six lsb of M. It
+// has a range of -32 thru 31.
+// r35 = M
+// r36 = j
+
+{ .mii
+ nop.m 999
+ nop.i 999 ;;
+(p0) and r36 = 0x3f, r35 ;;
+}
+
+// Calculate R
+// f13 = f44 - f12*f10 = ax - M*log2by64_hi
+// f14 = f13 - f8*f11 = R = (ax - M*log2by64_hi) - M*log2by64_lo
+
+{ .mfi
+ nop.m 999
+(p0) fnma.s1 sinh_FR_R_temp = sinh_FR_M, sinh_FR_log2by64_hi, sinh_FR_X
+ nop.i 999
+}
+
+{ .mfi
+(p0) ldfe sinh_FR_A2 = [r34],16
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fnma.s1 sinh_FR_R = sinh_FR_M, sinh_FR_log2by64_lo, sinh_FR_R_temp
+ nop.i 999
+}
+
+// Get the B coefficients
+// f15 = B_1
+// f32 = B_2
+// f33 = B_3
+
+{ .mmi
+(p0) ldfe sinh_FR_A3 = [r34],16 ;;
+(p0) ldfe sinh_FR_B1 = [r34],16
+ nop.i 999 ;;
+}
+
+{ .mmi
+(p0) ldfe sinh_FR_B2 = [r34],16 ;;
+(p0) ldfe sinh_FR_B3 = [r34],16
+ nop.i 999 ;;
+}
+
+{ .mii
+ nop.m 999
+(p0) shl r34 = r36, 0x2 ;;
+(p0) sxt1 r37 = r34 ;;
+}
+
+// ******************************************************
+// STEP 2 (TBL and EXP)
+// ******************************************************
+// Calculate Rsquared and Rcubed in preparation for p_even and p_odd
+// f12 = R*R*R
+// f13 = R*R
+// f14 = R <== from above
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_Rsq = sinh_FR_R, sinh_FR_R, f0
+(p0) shr r36 = r37, 0x2 ;;
+}
+
+// r34 = M-j = r35 - r36
+// r35 = N = (M-j)/64
+
+{ .mii
+(p0) sub r34 = r35, r36
+ nop.i 999 ;;
+(p0) shr r35 = r34, 0x6 ;;
+}
+
+{ .mii
+(p0) sub r40 = r38, r35
+(p0) adds r37 = 0x1, r35
+(p0) add r39 = r38, r35 ;;
+}
+
+// Get the address of the J table, add the offset,
+// addresses are sinh_AD_mJ and sinh_AD_J, get the T value
+// f32 = T(j)_hi
+// f33 = T(j)_lo
+// f34 = T(-j)_hi
+// f35 = T(-j)_lo
+
+{ .mmi
+(p0) sub r34 = r35, r32
+(p0) addl r37 = @ltoff(double_sinh_j_table), gp
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ld8 r37 = [r37]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_Rcub = sinh_FR_Rsq, sinh_FR_R, f0
+ nop.i 999
+}
+
+// ******************************************************
+// STEP 3 Now decide if we need to branch to EXP
+// ******************************************************
+// Put 32 in f9; p6 true if x < 32
+// Go to EXP if |x| >= 32
+
+{ .mlx
+ nop.m 999
+(p0) movl r32 = 0x0000000000010004 ;;
+}
+
+// Calculate p_even
+// f34 = B_2 + Rsq *B_3
+// f35 = B_1 + Rsq*f34 = B_1 + Rsq * (B_2 + Rsq *B_3)
+// f36 = p_even = Rsq * f35 = Rsq * (B_1 + Rsq * (B_2 + Rsq *B_3))
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_peven_temp1 = sinh_FR_Rsq, sinh_FR_B3, sinh_FR_B2
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_peven_temp2 = sinh_FR_Rsq, sinh_FR_peven_temp1, sinh_FR_B1
+ nop.i 999
+}
+
+// Calculate p_odd
+// f34 = A_2 + Rsq *A_3
+// f35 = A_1 + Rsq * (A_2 + Rsq *A_3)
+// f37 = podd = R + Rcub * (A_1 + Rsq * (A_2 + Rsq *A_3))
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_podd_temp1 = sinh_FR_Rsq, sinh_FR_A3, sinh_FR_A2
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p0) setf.exp sinh_FR_N_temp1 = r39
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_peven = sinh_FR_Rsq, sinh_FR_peven_temp2, f0
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_podd_temp2 = sinh_FR_Rsq, sinh_FR_podd_temp1, sinh_FR_A1
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p0) setf.exp f9 = r32
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_podd = sinh_FR_podd_temp2, sinh_FR_Rcub, sinh_FR_R
+ nop.i 999
+}
+
+// sinh_GR_mj contains the table offset for -j
+// sinh_GR_j contains the table offset for +j
+// p6 is true when j <= 0
+
+{ .mlx
+(p0) setf.exp sinh_FR_N_temp2 = r40
+(p0) movl r40 = 0x0000000000000020 ;;
+}
+
+{ .mfi
+(p0) sub sinh_GR_mJ = r40, r36
+(p0) fmerge.se sinh_FR_spos = sinh_FR_N_temp1, f1
+(p0) adds sinh_GR_J = 0x20, r36 ;;
+}
+
+{ .mii
+ nop.m 999
+(p0) shl sinh_GR_mJ = sinh_GR_mJ, 5 ;;
+(p0) add sinh_AD_mJ = r37, sinh_GR_mJ ;;
+}
+
+{ .mmi
+ nop.m 999
+(p0) ldfe sinh_FR_Tmjhi = [sinh_AD_mJ],16
+(p0) shl sinh_GR_J = sinh_GR_J, 5 ;;
+}
+
+{ .mfi
+(p0) ldfs sinh_FR_Tmjlo = [sinh_AD_mJ],16
+(p0) fcmp.lt.unc.s1 p0,p7 = sinh_FR_X,f9
+(p0) add sinh_AD_J = r37, sinh_GR_J ;;
+}
+
+{ .mmi
+(p0) ldfe sinh_FR_Tjhi = [sinh_AD_J],16 ;;
+(p0) ldfs sinh_FR_Tjlo = [sinh_AD_J],16
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p0) fmerge.se sinh_FR_sneg = sinh_FR_N_temp2, f1
+(p7) br.cond.spnt L(SINH_BY_EXP) ;;
+}
+
+{ .mfi
+ nop.m 999
+ nop.f 999
+ nop.i 999 ;;
+}
+
+// ******************************************************
+// If NOT branch to EXP
+// ******************************************************
+// Calculate S_hi and S_lo
+// sinh_FR_S_hi_temp = sinh_FR_sneg * sinh_FR_Tmjhi
+// sinh_FR_S_hi = sinh_FR_spos * sinh_FR_Tjhi - sinh_FR_S_hi_temp
+// sinh_FR_S_hi = sinh_FR_spos * sinh_FR_Tjhi - (sinh_FR_sneg * sinh_FR_Tmjlo)
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_S_hi_temp = sinh_FR_sneg, sinh_FR_Tmjhi, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fms.s1 sinh_FR_S_hi = sinh_FR_spos, sinh_FR_Tjhi, sinh_FR_S_hi_temp
+ nop.i 999
+}
+
+// Calculate C_hi
+// sinh_FR_C_hi_temp1 = sinh_FR_sneg * sinh_FR_Tmjhi
+// sinh_FR_C_hi = sinh_FR_spos * sinh_FR_Tjhi + sinh_FR_C_hi_temp1
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_C_hi_temp1 = sinh_FR_sneg, sinh_FR_Tmjhi, f0
+ nop.i 999 ;;
+}
+
+// sinh_FR_S_lo_temp1 = sinh_FR_spos * sinh_FR_Tjhi - sinh_FR_S_hi
+// sinh_FR_S_lo_temp2 = -sinh_FR_sneg * sinh_FR_Tmjlo + (sinh_FR_spos * sinh_FR_Tjhi - sinh_FR_S_hi)
+// sinh_FR_S_lo_temp2 = -sinh_FR_sneg * sinh_FR_Tmjlo + (sinh_FR_S_lo_temp1 )
+
+{ .mfi
+ nop.m 999
+(p0) fms.s1 sinh_FR_S_lo_temp1 = sinh_FR_spos, sinh_FR_Tjhi, sinh_FR_S_hi
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_C_hi = sinh_FR_spos, sinh_FR_Tjhi, sinh_FR_C_hi_temp1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fnma.s1 sinh_FR_S_lo_temp2 = sinh_FR_sneg, sinh_FR_Tmjhi, sinh_FR_S_lo_temp1
+ nop.i 999
+}
+
+// sinh_FR_S_lo_temp1 = sinh_FR_sneg * sinh_FR_Tmjlo
+// sinh_FR_S_lo_temp3 = sinh_FR_spos * sinh_FR_Tjlo - sinh_FR_S_lo_temp1
+// sinh_FR_S_lo_temp3 = sinh_FR_spos * sinh_FR_Tjlo -(sinh_FR_sneg * sinh_FR_Tmjlo)
+// sinh_FR_S_lo = sinh_FR_S_lo_temp3 + sinh_FR_S_lo_temp2
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_S_lo_temp1 = sinh_FR_sneg, sinh_FR_Tmjlo, f0
+ nop.i 999 ;;
+}
+
+/////////// BUG FIX fma to fms -TK
+{ .mfi
+ nop.m 999
+(p0) fms.s1 sinh_FR_S_lo_temp3 = sinh_FR_spos, sinh_FR_Tjlo, sinh_FR_S_lo_temp1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_S_lo = sinh_FR_S_lo_temp3, f1, sinh_FR_S_lo_temp2
+ nop.i 999 ;;
+}
+
+// Y_hi = S_hi
+// Y_lo = C_hi*p_odd + (S_hi*p_even + S_lo)
+// sinh_FR_Y_lo_temp = sinh_FR_S_hi * sinh_FR_peven + sinh_FR_S_lo
+// sinh_FR_Y_lo = sinh_FR_C_hi * sinh_FR_podd + sinh_FR_Y_lo_temp
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_Y_lo_temp = sinh_FR_S_hi, sinh_FR_peven, sinh_FR_S_lo
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_Y_lo = sinh_FR_C_hi, sinh_FR_podd, sinh_FR_Y_lo_temp
+ nop.i 999 ;;
+}
+
+// sinh_FR_SINH = Y_hi + Y_lo
+// f8 = answer = sinh_FR_SGNX * sinh_FR_SINH
+
+// Dummy multiply to generate inexact
+{ .mfi
+ nop.m 999
+(p0) fmpy.s0 sinh_FR_tmp = sinh_FR_all_ones, sinh_FR_all_ones
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_SINH = sinh_FR_S_hi, f1, sinh_FR_Y_lo
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p0) fma.d.s0 f8 = sinh_FR_SGNX, sinh_FR_SINH,f0
+(p0) br.ret.sptk b0 ;;
+}
+
+
+L(SINH_BY_EXP):
+
+// When p7 is true, we know that an overflow is not going to happen
+// When p7 is false, we must check for possible overflow
+// p7 is the over_SAFE flag
+// Y_hi = Tjhi
+// Y_lo = Tjhi * (p_odd + p_even) +Tjlo
+// Scale = sign * 2^(N-1)
+// sinh_FR_Y_lo = sinh_FR_Tjhi * (sinh_FR_peven + sinh_FR_podd)
+// sinh_FR_Y_lo = sinh_FR_Tjhi * (sinh_FR_Y_lo_temp )
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_Y_lo_temp = sinh_FR_peven, f1, sinh_FR_podd
+ nop.i 999
+}
+
+// Now we are in EXP. This is the only path where an overflow is possible
+// but not for certain. So this is the only path where over_SAFE has any use.
+// r34 still has N-1
+// There is a danger of double-extended overflow if N-1 > 16382 = 0x3ffe
+// There is a danger of double overflow if N-1 > 0x3fe = 1022
+{ .mlx
+ nop.m 999
+(p0) movl r32 = 0x00000000000003fe ;;
+}
+
+{ .mfi
+(p0) cmp.gt.unc p0,p7 = r34, r32
+(p0) fmerge.s sinh_FR_SCALE = sinh_FR_SGNX, sinh_FR_spos
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_Y_lo = sinh_FR_Tjhi, sinh_FR_Y_lo_temp, sinh_FR_Tjlo
+ nop.i 999 ;;
+}
+
+// f8 = answer = scale * (Y_hi + Y_lo)
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_SINH_temp = sinh_FR_Y_lo, f1, sinh_FR_Tjhi
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.d.s0 f44 = sinh_FR_SCALE, sinh_FR_SINH_temp, f0
+ nop.i 999 ;;
+}
+
+// Dummy multiply to generate inexact
+{ .mfi
+ nop.m 999
+(p7) fmpy.s0 sinh_FR_tmp = sinh_FR_all_ones, sinh_FR_all_ones
+ nop.i 999 ;;
+}
+
+// If over_SAFE is set, return
+{ .mfb
+ nop.m 999
+(p7) fmerge.s f8 = f44,f44
+(p7) br.ret.sptk b0 ;;
+}
+
+// Else see if we overflowed
+// S0 user supplied status
+// S2 user supplied status + WRE + TD (Overflows)
+// If WRE is set then an overflow will not occur in EXP.
+// The input value that would cause a register (WRE) value to overflow is about 2^15
+// and this input would go into the HUGE path.
+// Answer with WRE is in f43.
+
+{ .mfi
+ nop.m 999
+(p0) fsetc.s2 0x7F,0x42
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.d.s2 f43 = sinh_FR_SCALE, sinh_FR_SINH_temp, f0
+ nop.i 999 ;;
+}
+
+// 103FF => 103FF -FFFF = 400(true)
+// 400 + 3FF = 7FF, which is 1 more that the exponent of the largest
+// double (7FE). So 0 103FF 8000000000000000 is one ulp more than
+// largest double in register bias
+// Now set p8 if the answer with WRE is greater than or equal this value
+// Also set p9 if the answer with WRE is less than or equal to negative this value
+
+{ .mlx
+ nop.m 999
+(p0) movl r32 = 0x000000000103FF ;;
+}
+
+{ .mmf
+ nop.m 999
+(p0) setf.exp f41 = r32
+(p0) fsetc.s2 0x7F,0x40 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fcmp.ge.unc.s1 p8, p0 = f43, f41
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fmerge.ns f42 = f41, f41
+ nop.i 999 ;;
+}
+
+// The error tag for overflow is 127
+{ .mii
+ nop.m 999
+ nop.i 999 ;;
+(p8) mov r47 = 127 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p0) fcmp.le.unc.s1 p9, p0 = f43, f42
+(p8) br.cond.spnt L(SINH_ERROR_SUPPORT) ;;
+}
+
+{ .mii
+ nop.m 999
+ nop.i 999 ;;
+(p9) mov r47 = 127
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+(p9) br.cond.spnt L(SINH_ERROR_SUPPORT) ;;
+}
+
+// Dummy multiply to generate inexact
+{ .mfi
+ nop.m 999
+(p0) fmpy.s0 sinh_FR_tmp = sinh_FR_all_ones, sinh_FR_all_ones
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p0) fmerge.s f8 = f44,f44
+(p0) br.ret.sptk b0 ;;
+}
+
+L(SINH_HUGE):
+
+// for SINH_HUGE, put 24000 in exponent; take sign from input; add 1
+// SAFE: SAFE is always 0 for HUGE
+
+{ .mlx
+ nop.m 999
+(p0) movl r32 = 0x0000000000015dbf ;;
+}
+
+{ .mfi
+(p0) setf.exp f9 = r32
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_signed_hi_lo = sinh_FR_SGNX, f9, f1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.d.s0 f44 = sinh_FR_signed_hi_lo, f9, f0
+(p0) mov r47 = 127
+}
+.endp sinh
+ASM_SIZE_DIRECTIVE(sinh)
+#ifdef _LIBC
+ASM_SIZE_DIRECTIVE(__ieee754_sinh)
+#endif
+
+// Stack operations when calling error support.
+// (1) (2) (3) (call) (4)
+// sp -> + psp -> + psp -> + sp -> +
+// | | | |
+// | | <- GR_Y R3 ->| <- GR_RESULT | -> f8
+// | | | |
+// | <-GR_Y Y2->| Y2 ->| <- GR_Y |
+// | | | |
+// | | <- GR_X X1 ->| |
+// | | | |
+// sp-64 -> + sp -> + sp -> + +
+// save ar.pfs save b0 restore gp
+// save gp restore ar.pfs
+
+.proc __libm_error_region
+__libm_error_region:
+L(SINH_ERROR_SUPPORT):
+.prologue
+
+// (1)
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+
+
+// (2)
+{ .mmi
+ stfd [GR_Parameter_Y] = f0,16 // STORE Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+
+.body
+// (3)
+{ .mib
+ stfd [GR_Parameter_X] = f8 // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
+}
+{ .mib
+ stfd [GR_Parameter_Y] = f44 // STORE Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+
+// (4)
+{ .mmi
+ ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_sinhf.S b/sysdeps/ia64/fpu/e_sinhf.S
new file mode 100644
index 0000000..9b801d3
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_sinhf.S
@@ -0,0 +1,1311 @@
+.file "sinhf.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00 Initial version
+// 4/04/00 Unwind support added
+// 8/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+// 10/12/00 Update to set denormal operand and underflow flags
+// 1/22/01 Fixed to set inexact flag for small args.
+//
+// API
+//==============================================================
+// float = sinhf(float)
+// input floating point f8
+// output floating point f8
+//
+// Registers used
+//==============================================================
+// general registers:
+// r32 -> r47
+// predicate registers used:
+// p6 p7 p8 p9
+// floating-point registers used:
+// f9 -> f15; f32 -> f45;
+// f8 has input, then output
+//
+// Overview of operation
+//==============================================================
+// There are four paths
+// 1. |x| < 0.25 SINH_BY_POLY
+// 2. |x| < 32 SINH_BY_TBL
+// 3. |x| < 2^14 SINH_BY_EXP
+// 4. |x_ >= 2^14 SINH_HUGE
+//
+// For double extended we get infinity for x >= 400c b174 ddc0 31ae c0ea
+// >= 1.0110001.... x 2^13
+// >= 11357.2166
+//
+// But for double we get infinity for x >= 408633ce8fb9f87e
+// >= 1.0110...x 2^9
+// >= +7.10476e+002
+//
+// And for single we get infinity for x >= 42b3a496
+// >= 1.0110... 2^6
+// >= 89.8215
+//
+// SAFE: If there is danger of overflow set SAFE to 0
+// NOT implemented: if there is danger of underflow, set SAFE to 0
+// SAFE for all paths listed below
+//
+// 1. SINH_BY_POLY
+// ===============
+// If |x| is less than the tiny threshold, then clear SAFE
+// For double, the tiny threshold is -1022 = -0x3fe => -3fe + ffff = fc01
+// register-biased, this is fc01
+// For single, the tiny threshold is -126 = -7e => -7e + ffff = ff81
+// If |x| < tiny threshold, set SAFE = 0
+//
+// 2. SINH_BY_TBL
+// =============
+// SAFE: SAFE is always 1 for TBL;
+//
+// 3. SINH_BY_EXP
+// ==============
+// There is a danger of double-extended overflow if N-1 > 16382 = 0x3ffe
+// r34 has N-1; 16382 is in register biased form, 0x13ffd
+// There is danger of double overflow if N-1 > 0x3fe
+// in register biased form, 0x103fd
+// Analagously, there is danger of single overflow if N-1 > 0x7e
+// in register biased form, 0x1007d
+// SAFE: If there is danger of overflow set SAFE to 0
+//
+// 4. SINH_HUGE
+// ============
+// SAFE: SAFE is always 0 for HUGE
+//
+
+#include "libm_support.h"
+
+// Assembly macros
+//==============================================================
+sinh_FR_X = f44
+sinh_FR_X2 = f9
+sinh_FR_X4 = f10
+sinh_FR_SGNX = f40
+sinh_FR_all_ones = f45
+sinh_FR_tmp = f42
+
+sinh_FR_Inv_log2by64 = f9
+sinh_FR_log2by64_lo = f11
+sinh_FR_log2by64_hi = f10
+
+sinh_FR_A1 = f9
+sinh_FR_A2 = f10
+sinh_FR_A3 = f11
+
+sinh_FR_Rcub = f12
+sinh_FR_M_temp = f13
+sinh_FR_R_temp = f13
+sinh_FR_Rsq = f13
+sinh_FR_R = f14
+
+sinh_FR_M = f38
+
+sinh_FR_B1 = f15
+sinh_FR_B2 = f32
+sinh_FR_B3 = f33
+
+sinh_FR_peven_temp1 = f34
+sinh_FR_peven_temp2 = f35
+sinh_FR_peven = f36
+
+sinh_FR_podd_temp1 = f34
+sinh_FR_podd_temp2 = f35
+sinh_FR_podd = f37
+
+sinh_FR_poly_podd_temp1 = f11
+sinh_FR_poly_podd_temp2 = f13
+sinh_FR_poly_peven_temp1 = f11
+sinh_FR_poly_peven_temp2 = f13
+
+sinh_FR_J_temp = f9
+sinh_FR_J = f10
+
+sinh_FR_Mmj = f39
+
+sinh_FR_N_temp1 = f11
+sinh_FR_N_temp2 = f12
+sinh_FR_N = f13
+
+sinh_FR_spos = f14
+sinh_FR_sneg = f15
+
+sinh_FR_Tjhi = f32
+sinh_FR_Tjlo = f33
+sinh_FR_Tmjhi = f34
+sinh_FR_Tmjlo = f35
+
+sinh_GR_mJ = r35
+sinh_GR_J = r36
+
+sinh_AD_mJ = r38
+sinh_AD_J = r39
+sinh_GR_all_ones = r40
+
+sinh_FR_S_hi = f9
+sinh_FR_S_hi_temp = f10
+sinh_FR_S_lo_temp1 = f11
+sinh_FR_S_lo_temp2 = f12
+sinh_FR_S_lo_temp3 = f13
+
+sinh_FR_S_lo = f38
+sinh_FR_C_hi = f39
+
+sinh_FR_C_hi_temp1 = f10
+sinh_FR_Y_hi = f11
+sinh_FR_Y_lo_temp = f12
+sinh_FR_Y_lo = f13
+sinh_FR_SINH = f9
+
+sinh_FR_P1 = f14
+sinh_FR_P2 = f15
+sinh_FR_P3 = f32
+sinh_FR_P4 = f33
+sinh_FR_P5 = f34
+sinh_FR_P6 = f35
+
+sinh_FR_TINY_THRESH = f9
+
+sinh_FR_SINH_temp = f10
+sinh_FR_SCALE = f11
+
+sinh_FR_signed_hi_lo = f10
+
+
+GR_SAVE_PFS = r41
+GR_SAVE_B0 = r42
+GR_SAVE_GP = r43
+
+GR_Parameter_X = r44
+GR_Parameter_Y = r45
+GR_Parameter_RESULT = r46
+
+// Data tables
+//==============================================================
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
+.align 16
+double_sinh_arg_reduction:
+ASM_TYPE_DIRECTIVE(double_sinh_arg_reduction,@object)
+ data8 0xB8AA3B295C17F0BC, 0x00004005
+ data8 0xB17217F7D1000000, 0x00003FF8
+ data8 0xCF79ABC9E3B39804, 0x00003FD0
+ASM_SIZE_DIRECTIVE(double_sinh_arg_reduction)
+
+double_sinh_p_table:
+ASM_TYPE_DIRECTIVE(double_sinh_p_table,@object)
+ data8 0xAAAAAAAAAAAAAAAB, 0x00003FFC
+ data8 0x8888888888888412, 0x00003FF8
+ data8 0xD00D00D00D4D39F2, 0x00003FF2
+ data8 0xB8EF1D28926D8891, 0x00003FEC
+ data8 0xD732377688025BE9, 0x00003FE5
+ data8 0xB08AF9AE78C1239F, 0x00003FDE
+ASM_SIZE_DIRECTIVE(double_sinh_p_table)
+
+double_sinh_ab_table:
+ASM_TYPE_DIRECTIVE(double_sinh_ab_table,@object)
+ data8 0xAAAAAAAAAAAAAAAC, 0x00003FFC
+ data8 0x88888888884ECDD5, 0x00003FF8
+ data8 0xD00D0C6DCC26A86B, 0x00003FF2
+ data8 0x8000000000000002, 0x00003FFE
+ data8 0xAAAAAAAAAA402C77, 0x00003FFA
+ data8 0xB60B6CC96BDB144D, 0x00003FF5
+ASM_SIZE_DIRECTIVE(double_sinh_ab_table)
+
+double_sinh_j_table:
+ASM_TYPE_DIRECTIVE(double_sinh_j_table,@object)
+ data8 0xB504F333F9DE6484, 0x00003FFE, 0x1EB2FB13, 0x00000000
+ data8 0xB6FD91E328D17791, 0x00003FFE, 0x1CE2CBE2, 0x00000000
+ data8 0xB8FBAF4762FB9EE9, 0x00003FFE, 0x1DDC3CBC, 0x00000000
+ data8 0xBAFF5AB2133E45FB, 0x00003FFE, 0x1EE9AA34, 0x00000000
+ data8 0xBD08A39F580C36BF, 0x00003FFE, 0x9EAEFDC1, 0x00000000
+ data8 0xBF1799B67A731083, 0x00003FFE, 0x9DBF517B, 0x00000000
+ data8 0xC12C4CCA66709456, 0x00003FFE, 0x1EF88AFB, 0x00000000
+ data8 0xC346CCDA24976407, 0x00003FFE, 0x1E03B216, 0x00000000
+ data8 0xC5672A115506DADD, 0x00003FFE, 0x1E78AB43, 0x00000000
+ data8 0xC78D74C8ABB9B15D, 0x00003FFE, 0x9E7B1747, 0x00000000
+ data8 0xC9B9BD866E2F27A3, 0x00003FFE, 0x9EFE3C0E, 0x00000000
+ data8 0xCBEC14FEF2727C5D, 0x00003FFE, 0x9D36F837, 0x00000000
+ data8 0xCE248C151F8480E4, 0x00003FFE, 0x9DEE53E4, 0x00000000
+ data8 0xD06333DAEF2B2595, 0x00003FFE, 0x9E24AE8E, 0x00000000
+ data8 0xD2A81D91F12AE45A, 0x00003FFE, 0x1D912473, 0x00000000
+ data8 0xD4F35AABCFEDFA1F, 0x00003FFE, 0x1EB243BE, 0x00000000
+ data8 0xD744FCCAD69D6AF4, 0x00003FFE, 0x1E669A2F, 0x00000000
+ data8 0xD99D15C278AFD7B6, 0x00003FFE, 0x9BBC610A, 0x00000000
+ data8 0xDBFBB797DAF23755, 0x00003FFE, 0x1E761035, 0x00000000
+ data8 0xDE60F4825E0E9124, 0x00003FFE, 0x9E0BE175, 0x00000000
+ data8 0xE0CCDEEC2A94E111, 0x00003FFE, 0x1CCB12A1, 0x00000000
+ data8 0xE33F8972BE8A5A51, 0x00003FFE, 0x1D1BFE90, 0x00000000
+ data8 0xE5B906E77C8348A8, 0x00003FFE, 0x1DF2F47A, 0x00000000
+ data8 0xE8396A503C4BDC68, 0x00003FFE, 0x1EF22F22, 0x00000000
+ data8 0xEAC0C6E7DD24392F, 0x00003FFE, 0x9E3F4A29, 0x00000000
+ data8 0xED4F301ED9942B84, 0x00003FFE, 0x1EC01A5B, 0x00000000
+ data8 0xEFE4B99BDCDAF5CB, 0x00003FFE, 0x1E8CAC3A, 0x00000000
+ data8 0xF281773C59FFB13A, 0x00003FFE, 0x9DBB3FAB, 0x00000000
+ data8 0xF5257D152486CC2C, 0x00003FFE, 0x1EF73A19, 0x00000000
+ data8 0xF7D0DF730AD13BB9, 0x00003FFE, 0x9BB795B5, 0x00000000
+ data8 0xFA83B2DB722A033A, 0x00003FFE, 0x1EF84B76, 0x00000000
+ data8 0xFD3E0C0CF486C175, 0x00003FFE, 0x9EF5818B, 0x00000000
+ data8 0x8000000000000000, 0x00003FFF, 0x00000000, 0x00000000
+ data8 0x8164D1F3BC030773, 0x00003FFF, 0x1F77CACA, 0x00000000
+ data8 0x82CD8698AC2BA1D7, 0x00003FFF, 0x1EF8A91D, 0x00000000
+ data8 0x843A28C3ACDE4046, 0x00003FFF, 0x1E57C976, 0x00000000
+ data8 0x85AAC367CC487B15, 0x00003FFF, 0x9EE8DA92, 0x00000000
+ data8 0x871F61969E8D1010, 0x00003FFF, 0x1EE85C9F, 0x00000000
+ data8 0x88980E8092DA8527, 0x00003FFF, 0x1F3BF1AF, 0x00000000
+ data8 0x8A14D575496EFD9A, 0x00003FFF, 0x1D80CA1E, 0x00000000
+ data8 0x8B95C1E3EA8BD6E7, 0x00003FFF, 0x9D0373AF, 0x00000000
+ data8 0x8D1ADF5B7E5BA9E6, 0x00003FFF, 0x9F167097, 0x00000000
+ data8 0x8EA4398B45CD53C0, 0x00003FFF, 0x1EB70051, 0x00000000
+ data8 0x9031DC431466B1DC, 0x00003FFF, 0x1F6EB029, 0x00000000
+ data8 0x91C3D373AB11C336, 0x00003FFF, 0x1DFD6D8E, 0x00000000
+ data8 0x935A2B2F13E6E92C, 0x00003FFF, 0x9EB319B0, 0x00000000
+ data8 0x94F4EFA8FEF70961, 0x00003FFF, 0x1EBA2BEB, 0x00000000
+ data8 0x96942D3720185A00, 0x00003FFF, 0x1F11D537, 0x00000000
+ data8 0x9837F0518DB8A96F, 0x00003FFF, 0x1F0D5A46, 0x00000000
+ data8 0x99E0459320B7FA65, 0x00003FFF, 0x9E5E7BCA, 0x00000000
+ data8 0x9B8D39B9D54E5539, 0x00003FFF, 0x9F3AAFD1, 0x00000000
+ data8 0x9D3ED9A72CFFB751, 0x00003FFF, 0x9E86DACC, 0x00000000
+ data8 0x9EF5326091A111AE, 0x00003FFF, 0x9F3EDDC2, 0x00000000
+ data8 0xA0B0510FB9714FC2, 0x00003FFF, 0x1E496E3D, 0x00000000
+ data8 0xA27043030C496819, 0x00003FFF, 0x9F490BF6, 0x00000000
+ data8 0xA43515AE09E6809E, 0x00003FFF, 0x1DD1DB48, 0x00000000
+ data8 0xA5FED6A9B15138EA, 0x00003FFF, 0x1E65EBFB, 0x00000000
+ data8 0xA7CD93B4E965356A, 0x00003FFF, 0x9F427496, 0x00000000
+ data8 0xA9A15AB4EA7C0EF8, 0x00003FFF, 0x1F283C4A, 0x00000000
+ data8 0xAB7A39B5A93ED337, 0x00003FFF, 0x1F4B0047, 0x00000000
+ data8 0xAD583EEA42A14AC6, 0x00003FFF, 0x1F130152, 0x00000000
+ data8 0xAF3B78AD690A4375, 0x00003FFF, 0x9E8367C0, 0x00000000
+ data8 0xB123F581D2AC2590, 0x00003FFF, 0x9F705F90, 0x00000000
+ data8 0xB311C412A9112489, 0x00003FFF, 0x1EFB3C53, 0x00000000
+ data8 0xB504F333F9DE6484, 0x00003FFF, 0x1F32FB13, 0x00000000
+ASM_SIZE_DIRECTIVE(double_sinh_j_table)
+
+.align 32
+.global sinhf#
+
+.section .text
+.proc sinhf#
+.align 32
+
+sinhf:
+#ifdef _LIBC
+.global __ieee754_sinhf
+.type __ieee754_sinhf,@function
+__ieee754_sinhf:
+#endif
+
+// X infinity or NAN?
+// Take invalid fault if enabled
+
+
+{ .mfi
+ alloc r32 = ar.pfs,0,12,4,0
+(p0) fclass.m.unc p6,p0 = f8, 0xe3 //@qnan | @snan | @inf
+ mov sinh_GR_all_ones = -1
+}
+;;
+
+
+{ .mfb
+ nop.m 999
+(p6) fma.s.s0 f8 = f8,f1,f8
+(p6) br.ret.spnt b0 ;;
+}
+
+// Put 0.25 in f9; p6 true if x < 0.25
+// Make constant that will generate inexact when squared
+{ .mlx
+ setf.sig sinh_FR_all_ones = sinh_GR_all_ones
+(p0) movl r32 = 0x000000000000fffd ;;
+}
+
+{ .mfi
+(p0) setf.exp f9 = r32
+(p0) fclass.m.unc p7,p0 = f8, 0x07 //@zero
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p0) fmerge.s sinh_FR_X = f0,f8
+(p7) br.ret.spnt b0 ;;
+}
+
+// Identify denormal operands.
+{ .mfi
+ nop.m 999
+ fclass.m.unc p10,p0 = f8, 0x09 // + denorm
+ nop.i 999
+};;
+{ .mfi
+ nop.m 999
+ fclass.m.unc p11,p0 = f8, 0x0a // - denorm
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fmerge.s sinh_FR_SGNX = f8,f1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fcmp.lt.unc.s1 p0,p7 = sinh_FR_X,f9
+ nop.i 999 ;;
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+(p7) br.cond.sptk L(SINH_BY_TBL) ;;
+}
+
+
+L(SINH_BY_POLY):
+
+// POLY cannot overflow so there is no need to call __libm_error_support
+// Set tiny_SAFE (p7) to 1(0) if answer is not tiny
+// Currently we do not use tiny_SAFE. So the setting of tiny_SAFE is
+// commented out.
+//(p0) movl r32 = 0x000000000000fc01
+//(p0) setf.exp f10 = r32
+//(p0) fcmp.lt.unc.s1 p6,p7 = f8,f10
+// Here is essentially the algorithm for SINH_BY_POLY. Care is take for the order
+// of multiplication; and P_1 is not exactly 1/3!, P_2 is not exactly 1/5!, etc.
+// Note that ax = |x|
+// sinh(x) = sign * (series(e^x) - series(e^-x))/2
+// = sign * (ax + ax^3/3! + ax^5/5! + ax^7/7! + ax^9/9! + ax^11/11! + ax^13/13!)
+// = sign * (ax + ax * ( ax^2 * (1/3! + ax^4 * (1/7! + ax^4*1/11!)) )
+// + ax * ( ax^4 * (1/5! + ax^4 * (1/9! + ax^4*1/13!)) ) )
+// = sign * (ax + ax*p_odd + (ax*p_even))
+// = sign * (ax + Y_lo)
+// sinh(x) = sign * (Y_hi + Y_lo)
+// Get the values of P_x from the table
+{ .mfb
+(p0) addl r34 = @ltoff(double_sinh_p_table), gp
+(p10) fma.s.s0 f8 = f8,f8,f8
+(p10) br.ret.spnt b0
+}
+;;
+
+{ .mfb
+ ld8 r34 = [r34]
+(p11) fnma.s.s0 f8 = f8,f8,f8
+(p11) br.ret.spnt b0
+}
+;;
+
+// Calculate sinh_FR_X2 = ax*ax and sinh_FR_X4 = ax*ax*ax*ax
+{ .mmf
+ nop.m 999
+(p0) ldfe sinh_FR_P1 = [r34],16
+(p0) fma.s1 sinh_FR_X2 = sinh_FR_X, sinh_FR_X, f0 ;;
+}
+
+{ .mmi
+(p0) ldfe sinh_FR_P2 = [r34],16 ;;
+(p0) ldfe sinh_FR_P3 = [r34],16
+ nop.i 999 ;;
+}
+
+{ .mmi
+(p0) ldfe sinh_FR_P4 = [r34],16 ;;
+(p0) ldfe sinh_FR_P5 = [r34],16
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p0) ldfe sinh_FR_P6 = [r34],16
+(p0) fma.s1 sinh_FR_X4 = sinh_FR_X2, sinh_FR_X2, f0
+ nop.i 999 ;;
+}
+
+// Calculate sinh_FR_podd = p_odd and sinh_FR_peven = p_even
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_poly_podd_temp1 = sinh_FR_X4, sinh_FR_P5, sinh_FR_P3
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_poly_podd_temp2 = sinh_FR_X4, sinh_FR_poly_podd_temp1, sinh_FR_P1
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_poly_peven_temp1 = sinh_FR_X4, sinh_FR_P6, sinh_FR_P4
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_podd = sinh_FR_X2, sinh_FR_poly_podd_temp2, f0
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_poly_peven_temp2 = sinh_FR_X4, sinh_FR_poly_peven_temp1, sinh_FR_P2
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_peven = sinh_FR_X4, sinh_FR_poly_peven_temp2, f0
+ nop.i 999 ;;
+}
+
+// Calculate sinh_FR_Y_lo = ax*p_odd + (ax*p_even)
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_Y_lo_temp = sinh_FR_X, sinh_FR_peven, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_Y_lo = sinh_FR_X, sinh_FR_podd, sinh_FR_Y_lo_temp
+ nop.i 999 ;;
+}
+
+// Calculate sinh_FR_SINH = Y_hi + Y_lo. Note that ax = Y_hi
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_SINH = sinh_FR_X, f1, sinh_FR_Y_lo
+ nop.i 999 ;;
+}
+// Dummy multiply to generate inexact
+{ .mfi
+ nop.m 999
+(p0) fmpy.s0 sinh_FR_tmp = sinh_FR_all_ones, sinh_FR_all_ones
+ nop.i 999
+}
+
+// Calculate f8 = sign * (Y_hi + Y_lo)
+// Go to return
+{ .mfb
+ nop.m 999
+(p0) fma.s.s0 f8 = sinh_FR_SGNX,sinh_FR_SINH,f0
+(p0) br.ret.sptk b0 ;;
+}
+
+
+L(SINH_BY_TBL):
+
+// Now that we are at TBL; so far all we know is that |x| >= 0.25.
+// The first two steps are the same for TBL and EXP, but if we are HUGE
+// we want to leave now.
+// Double-extended:
+// Go to HUGE if |x| >= 2^14, 1000d (register-biased) is e = 14 (true)
+// Double
+// Go to HUGE if |x| >= 2^10, 10009 (register-biased) is e = 10 (true)
+// Single
+// Go to HUGE if |x| >= 2^7, 10006 (register-biased) is e = 7 (true)
+
+{ .mlx
+ nop.m 999
+(p0) movl r32 = 0x0000000000010006 ;;
+}
+
+{ .mfi
+(p0) setf.exp f9 = r32
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fcmp.ge.unc.s1 p6,p7 = sinh_FR_X,f9
+ nop.i 999 ;;
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+(p6) br.cond.spnt L(SINH_HUGE) ;;
+}
+
+// r32 = 1
+// r34 = N-1
+// r35 = N
+// r36 = j
+// r37 = N+1
+
+// TBL can never overflow
+// sinh(x) = sinh(B+R)
+// = sinh(B)cosh(R) + cosh(B)sinh(R)
+//
+// ax = |x| = M*log2/64 + R
+// B = M*log2/64
+// M = 64*N + j
+// We will calcualte M and get N as (M-j)/64
+// The division is a shift.
+// exp(B) = exp(N*log2 + j*log2/64)
+// = 2^N * 2^(j*log2/64)
+// sinh(B) = 1/2(e^B -e^-B)
+// = 1/2(2^N * 2^(j*log2/64) - 2^-N * 2^(-j*log2/64))
+// sinh(B) = (2^(N-1) * 2^(j*log2/64) - 2^(-N-1) * 2^(-j*log2/64))
+// cosh(B) = (2^(N-1) * 2^(j*log2/64) + 2^(-N-1) * 2^(-j*log2/64))
+// 2^(j*log2/64) is stored as Tjhi + Tjlo , j= -32,....,32
+// Tjhi is double-extended (80-bit) and Tjlo is single(32-bit)
+// R = ax - M*log2/64
+// R = ax - M*log2_by_64_hi - M*log2_by_64_lo
+// exp(R) = 1 + R +R^2(1/2! + R(1/3! + R(1/4! + ... + R(1/n!)...)
+// = 1 + p_odd + p_even
+// where the p_even uses the A coefficients and the p_even uses the B coefficients
+// So sinh(R) = 1 + p_odd + p_even -(1 -p_odd -p_even)/2 = p_odd
+// cosh(R) = 1 + p_even
+// sinh(B) = S_hi + S_lo
+// cosh(B) = C_hi
+// sinh(x) = sinh(B)cosh(R) + cosh(B)sinh(R)
+// ******************************************************
+// STEP 1 (TBL and EXP)
+// ******************************************************
+// Get the following constants.
+// f9 = Inv_log2by64
+// f10 = log2by64_hi
+// f11 = log2by64_lo
+
+{ .mmi
+(p0) adds r32 = 0x1,r0
+(p0) addl r34 = @ltoff(double_sinh_arg_reduction), gp
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ld8 r34 = [r34]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+
+// We want 2^(N-1) and 2^(-N-1). So bias N-1 and -N-1 and
+// put them in an exponent.
+// sinh_FR_spos = 2^(N-1) and sinh_FR_sneg = 2^(-N-1)
+// r39 = 0xffff + (N-1) = 0xffff +N -1
+// r40 = 0xffff - (N +1) = 0xffff -N -1
+
+{ .mlx
+ nop.m 999
+(p0) movl r38 = 0x000000000000fffe ;;
+}
+
+{ .mmi
+(p0) ldfe sinh_FR_Inv_log2by64 = [r34],16 ;;
+(p0) ldfe sinh_FR_log2by64_hi = [r34],16
+ nop.i 999 ;;
+}
+
+{ .mbb
+(p0) ldfe sinh_FR_log2by64_lo = [r34],16
+ nop.b 999
+ nop.b 999 ;;
+}
+
+// Get the A coefficients
+// f9 = A_1
+// f10 = A_2
+// f11 = A_3
+
+{ .mmi
+ nop.m 999
+(p0) addl r34 = @ltoff(double_sinh_ab_table), gp
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ld8 r34 = [r34]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+
+// Calculate M and keep it as integer and floating point.
+// f38 = M = round-to-integer(x*Inv_log2by64)
+// sinh_FR_M = M = truncate(ax/(log2/64))
+// Put the significand of M in r35
+// and the floating point representation of M in sinh_FR_M
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_M = sinh_FR_X, sinh_FR_Inv_log2by64, f0
+ nop.i 999
+}
+
+{ .mfi
+(p0) ldfe sinh_FR_A1 = [r34],16
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fcvt.fx.s1 sinh_FR_M_temp = sinh_FR_M
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fnorm.s1 sinh_FR_M = sinh_FR_M_temp
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p0) getf.sig r35 = sinh_FR_M_temp
+ nop.f 999
+ nop.i 999 ;;
+}
+
+// M is still in r35. Calculate j. j is the signed extension of the six lsb of M. It
+// has a range of -32 thru 31.
+// r35 = M
+// r36 = j
+
+{ .mii
+ nop.m 999
+ nop.i 999 ;;
+(p0) and r36 = 0x3f, r35 ;;
+}
+
+// Calculate R
+// f13 = f44 - f12*f10 = ax - M*log2by64_hi
+// f14 = f13 - f8*f11 = R = (ax - M*log2by64_hi) - M*log2by64_lo
+
+{ .mfi
+ nop.m 999
+(p0) fnma.s1 sinh_FR_R_temp = sinh_FR_M, sinh_FR_log2by64_hi, sinh_FR_X
+ nop.i 999
+}
+
+{ .mfi
+(p0) ldfe sinh_FR_A2 = [r34],16
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fnma.s1 sinh_FR_R = sinh_FR_M, sinh_FR_log2by64_lo, sinh_FR_R_temp
+ nop.i 999
+}
+
+// Get the B coefficients
+// f15 = B_1
+// f32 = B_2
+// f33 = B_3
+
+{ .mmi
+(p0) ldfe sinh_FR_A3 = [r34],16 ;;
+(p0) ldfe sinh_FR_B1 = [r34],16
+ nop.i 999 ;;
+}
+
+{ .mmi
+(p0) ldfe sinh_FR_B2 = [r34],16 ;;
+(p0) ldfe sinh_FR_B3 = [r34],16
+ nop.i 999 ;;
+}
+
+{ .mii
+ nop.m 999
+(p0) shl r34 = r36, 0x2 ;;
+(p0) sxt1 r37 = r34 ;;
+}
+
+// ******************************************************
+// STEP 2 (TBL and EXP)
+// ******************************************************
+// Calculate Rsquared and Rcubed in preparation for p_even and p_odd
+// f12 = R*R*R
+// f13 = R*R
+// f14 = R <== from above
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_Rsq = sinh_FR_R, sinh_FR_R, f0
+(p0) shr r36 = r37, 0x2 ;;
+}
+
+// r34 = M-j = r35 - r36
+// r35 = N = (M-j)/64
+
+{ .mii
+(p0) sub r34 = r35, r36
+ nop.i 999 ;;
+(p0) shr r35 = r34, 0x6 ;;
+}
+
+{ .mii
+(p0) sub r40 = r38, r35
+(p0) adds r37 = 0x1, r35
+(p0) add r39 = r38, r35 ;;
+}
+
+// Get the address of the J table, add the offset,
+// addresses are sinh_AD_mJ and sinh_AD_J, get the T value
+// f32 = T(j)_hi
+// f33 = T(j)_lo
+// f34 = T(-j)_hi
+// f35 = T(-j)_lo
+
+{ .mmi
+(p0) sub r34 = r35, r32
+(p0) addl r37 = @ltoff(double_sinh_j_table), gp
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ld8 r37 = [r37]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_Rcub = sinh_FR_Rsq, sinh_FR_R, f0
+ nop.i 999
+}
+
+// ******************************************************
+// STEP 3 Now decide if we need to branch to EXP
+// ******************************************************
+// Put 32 in f9; p6 true if x < 32
+// Go to EXP if |x| >= 32
+
+{ .mlx
+ nop.m 999
+(p0) movl r32 = 0x0000000000010004 ;;
+}
+
+// Calculate p_even
+// f34 = B_2 + Rsq *B_3
+// f35 = B_1 + Rsq*f34 = B_1 + Rsq * (B_2 + Rsq *B_3)
+// f36 = p_even = Rsq * f35 = Rsq * (B_1 + Rsq * (B_2 + Rsq *B_3))
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_peven_temp1 = sinh_FR_Rsq, sinh_FR_B3, sinh_FR_B2
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_peven_temp2 = sinh_FR_Rsq, sinh_FR_peven_temp1, sinh_FR_B1
+ nop.i 999
+}
+
+// Calculate p_odd
+// f34 = A_2 + Rsq *A_3
+// f35 = A_1 + Rsq * (A_2 + Rsq *A_3)
+// f37 = podd = R + Rcub * (A_1 + Rsq * (A_2 + Rsq *A_3))
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_podd_temp1 = sinh_FR_Rsq, sinh_FR_A3, sinh_FR_A2
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p0) setf.exp sinh_FR_N_temp1 = r39
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_peven = sinh_FR_Rsq, sinh_FR_peven_temp2, f0
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_podd_temp2 = sinh_FR_Rsq, sinh_FR_podd_temp1, sinh_FR_A1
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p0) setf.exp f9 = r32
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_podd = sinh_FR_podd_temp2, sinh_FR_Rcub, sinh_FR_R
+ nop.i 999
+}
+
+// sinh_GR_mj contains the table offset for -j
+// sinh_GR_j contains the table offset for +j
+// p6 is true when j <= 0
+
+{ .mlx
+(p0) setf.exp sinh_FR_N_temp2 = r40
+(p0) movl r40 = 0x0000000000000020 ;;
+}
+
+{ .mfi
+(p0) sub sinh_GR_mJ = r40, r36
+(p0) fmerge.se sinh_FR_spos = sinh_FR_N_temp1, f1
+(p0) adds sinh_GR_J = 0x20, r36 ;;
+}
+
+{ .mii
+ nop.m 999
+(p0) shl sinh_GR_mJ = sinh_GR_mJ, 5 ;;
+(p0) add sinh_AD_mJ = r37, sinh_GR_mJ ;;
+}
+
+{ .mmi
+ nop.m 999
+(p0) ldfe sinh_FR_Tmjhi = [sinh_AD_mJ],16
+(p0) shl sinh_GR_J = sinh_GR_J, 5 ;;
+}
+
+{ .mfi
+(p0) ldfs sinh_FR_Tmjlo = [sinh_AD_mJ],16
+(p0) fcmp.lt.unc.s1 p0,p7 = sinh_FR_X,f9
+(p0) add sinh_AD_J = r37, sinh_GR_J ;;
+}
+
+{ .mmi
+(p0) ldfe sinh_FR_Tjhi = [sinh_AD_J],16 ;;
+(p0) ldfs sinh_FR_Tjlo = [sinh_AD_J],16
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p0) fmerge.se sinh_FR_sneg = sinh_FR_N_temp2, f1
+(p7) br.cond.spnt L(SINH_BY_EXP) ;;
+}
+
+{ .mfi
+ nop.m 999
+ nop.f 999
+ nop.i 999 ;;
+}
+
+// ******************************************************
+// If NOT branch to EXP
+// ******************************************************
+// Calculate S_hi and S_lo
+// sinh_FR_S_hi_temp = sinh_FR_sneg * sinh_FR_Tmjhi
+// sinh_FR_S_hi = sinh_FR_spos * sinh_FR_Tjhi - sinh_FR_S_hi_temp
+// sinh_FR_S_hi = sinh_FR_spos * sinh_FR_Tjhi - (sinh_FR_sneg * sinh_FR_Tmjlo)
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_S_hi_temp = sinh_FR_sneg, sinh_FR_Tmjhi, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fms.s1 sinh_FR_S_hi = sinh_FR_spos, sinh_FR_Tjhi, sinh_FR_S_hi_temp
+ nop.i 999
+}
+
+// Calculate C_hi
+// sinh_FR_C_hi_temp1 = sinh_FR_sneg * sinh_FR_Tmjhi
+// sinh_FR_C_hi = sinh_FR_spos * sinh_FR_Tjhi + sinh_FR_C_hi_temp1
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_C_hi_temp1 = sinh_FR_sneg, sinh_FR_Tmjhi, f0
+ nop.i 999 ;;
+}
+
+// sinh_FR_S_lo_temp1 = sinh_FR_spos * sinh_FR_Tjhi - sinh_FR_S_hi
+// sinh_FR_S_lo_temp2 = -sinh_FR_sneg * sinh_FR_Tmjlo + (sinh_FR_spos * sinh_FR_Tjhi - sinh_FR_S_hi)
+// sinh_FR_S_lo_temp2 = -sinh_FR_sneg * sinh_FR_Tmjlo + (sinh_FR_S_lo_temp1 )
+
+{ .mfi
+ nop.m 999
+(p0) fms.s1 sinh_FR_S_lo_temp1 = sinh_FR_spos, sinh_FR_Tjhi, sinh_FR_S_hi
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_C_hi = sinh_FR_spos, sinh_FR_Tjhi, sinh_FR_C_hi_temp1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fnma.s1 sinh_FR_S_lo_temp2 = sinh_FR_sneg, sinh_FR_Tmjhi, sinh_FR_S_lo_temp1
+ nop.i 999
+}
+
+// sinh_FR_S_lo_temp1 = sinh_FR_sneg * sinh_FR_Tmjlo
+// sinh_FR_S_lo_temp3 = sinh_FR_spos * sinh_FR_Tjlo - sinh_FR_S_lo_temp1
+// sinh_FR_S_lo_temp3 = sinh_FR_spos * sinh_FR_Tjlo -(sinh_FR_sneg * sinh_FR_Tmjlo)
+// sinh_FR_S_lo = sinh_FR_S_lo_temp3 + sinh_FR_S_lo_temp2
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_S_lo_temp1 = sinh_FR_sneg, sinh_FR_Tmjlo, f0
+ nop.i 999 ;;
+}
+
+/////////// BUG FIX fma to fms -TK
+{ .mfi
+ nop.m 999
+(p0) fms.s1 sinh_FR_S_lo_temp3 = sinh_FR_spos, sinh_FR_Tjlo, sinh_FR_S_lo_temp1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_S_lo = sinh_FR_S_lo_temp3, f1, sinh_FR_S_lo_temp2
+ nop.i 999 ;;
+}
+
+// Y_hi = S_hi
+// Y_lo = C_hi*p_odd + (S_hi*p_even + S_lo)
+// sinh_FR_Y_lo_temp = sinh_FR_S_hi * sinh_FR_peven + sinh_FR_S_lo
+// sinh_FR_Y_lo = sinh_FR_C_hi * sinh_FR_podd + sinh_FR_Y_lo_temp
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_Y_lo_temp = sinh_FR_S_hi, sinh_FR_peven, sinh_FR_S_lo
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_Y_lo = sinh_FR_C_hi, sinh_FR_podd, sinh_FR_Y_lo_temp
+ nop.i 999 ;;
+}
+
+// sinh_FR_SINH = Y_hi + Y_lo
+// f8 = answer = sinh_FR_SGNX * sinh_FR_SINH
+
+// Dummy multiply to generate inexact
+{ .mfi
+ nop.m 999
+(p0) fmpy.s0 sinh_FR_tmp = sinh_FR_all_ones, sinh_FR_all_ones
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_SINH = sinh_FR_S_hi, f1, sinh_FR_Y_lo
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p0) fma.s.s0 f8 = sinh_FR_SGNX, sinh_FR_SINH,f0
+(p0) br.ret.sptk b0 ;;
+}
+
+
+L(SINH_BY_EXP):
+
+// When p7 is true, we know that an overflow is not going to happen
+// When p7 is false, we must check for possible overflow
+// p7 is the over_SAFE flag
+// Y_hi = Tjhi
+// Y_lo = Tjhi * (p_odd + p_even) +Tjlo
+// Scale = sign * 2^(N-1)
+// sinh_FR_Y_lo = sinh_FR_Tjhi * (sinh_FR_peven + sinh_FR_podd)
+// sinh_FR_Y_lo = sinh_FR_Tjhi * (sinh_FR_Y_lo_temp )
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_Y_lo_temp = sinh_FR_peven, f1, sinh_FR_podd
+ nop.i 999
+}
+
+// Now we are in EXP. This is the only path where an overflow is possible
+// but not for certain. So this is the only path where over_SAFE has any use.
+// r34 still has N-1
+// There is a danger of double-extended overflow if N-1 > 16382 = 0x3ffe
+// There is a danger of double overflow if N-1 > 0x3fe = 1022
+// There is a danger of single overflow if N-1 > 0x7e = 126
+{ .mlx
+ nop.m 999
+(p0) movl r32 = 0x000000000000007e ;;
+}
+
+{ .mfi
+(p0) cmp.gt.unc p0,p7 = r34, r32
+(p0) fmerge.s sinh_FR_SCALE = sinh_FR_SGNX, sinh_FR_spos
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_Y_lo = sinh_FR_Tjhi, sinh_FR_Y_lo_temp, sinh_FR_Tjlo
+ nop.i 999 ;;
+}
+
+// f8 = answer = scale * (Y_hi + Y_lo)
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_SINH_temp = sinh_FR_Y_lo, f1, sinh_FR_Tjhi
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s.s0 f44 = sinh_FR_SCALE, sinh_FR_SINH_temp, f0
+ nop.i 999 ;;
+}
+
+// Dummy multiply to generate inexact
+{ .mfi
+ nop.m 999
+(p7) fmpy.s0 sinh_FR_tmp = sinh_FR_all_ones, sinh_FR_all_ones
+ nop.i 999 ;;
+}
+
+// If over_SAFE is set, return
+{ .mfb
+ nop.m 999
+(p7) fmerge.s f8 = f44,f44
+(p7) br.ret.sptk b0 ;;
+}
+
+// Else see if we overflowed
+// S0 user supplied status
+// S2 user supplied status + WRE + TD (Overflows)
+// If WRE is set then an overflow will not occur in EXP.
+// The input value that would cause a register (WRE) value to overflow is about 2^15
+// and this input would go into the HUGE path.
+// Answer with WRE is in f43.
+
+{ .mfi
+ nop.m 999
+(p0) fsetc.s2 0x7F,0x42
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s.s2 f43 = sinh_FR_SCALE, sinh_FR_SINH_temp, f0
+ nop.i 999 ;;
+}
+
+// 1007F => 1007F -FFFF = 80(true)
+// 80 + 7F = FF, which is 1 more that the exponent of the largest
+// double (FE). So 0 1007F 8000000000000000 is one ulp more than
+// largest single in register bias
+// Now set p8 if the answer with WRE is greater than or equal this value
+// Also set p9 if the answer with WRE is less than or equal to negative this value
+
+{ .mlx
+ nop.m 999
+(p0) movl r32 = 0x0000000001007F ;;
+}
+
+{ .mmf
+ nop.m 999
+(p0) setf.exp f41 = r32
+(p0) fsetc.s2 0x7F,0x40 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fcmp.ge.unc.s1 p8, p0 = f43, f41
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fmerge.ns f42 = f41, f41
+ nop.i 999 ;;
+}
+
+// The error tag for overflow is 128
+{ .mii
+ nop.m 999
+ nop.i 999 ;;
+(p8) mov r47 = 128 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p0) fcmp.le.unc.s1 p9, p0 = f43, f42
+(p8) br.cond.spnt L(SINH_ERROR_SUPPORT) ;;
+}
+
+{ .mii
+ nop.m 999
+ nop.i 999 ;;
+(p9) mov r47 = 128
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+(p9) br.cond.spnt L(SINH_ERROR_SUPPORT) ;;
+}
+
+// Dummy multiply to generate inexact
+{ .mfi
+ nop.m 999
+(p0) fmpy.s0 sinh_FR_tmp = sinh_FR_all_ones, sinh_FR_all_ones
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p0) fmerge.s f8 = f44,f44
+(p0) br.ret.sptk b0 ;;
+}
+
+L(SINH_HUGE):
+
+// for SINH_HUGE, put 24000 in exponent; take sign from input; add 1
+// SAFE: SAFE is always 0 for HUGE
+
+{ .mlx
+ nop.m 999
+(p0) movl r32 = 0x0000000000015dbf ;;
+}
+
+{ .mfi
+(p0) setf.exp f9 = r32
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_signed_hi_lo = sinh_FR_SGNX, f9, f1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s.s0 f44 = sinh_FR_signed_hi_lo, f9, f0
+(p0) mov r47 = 128
+}
+.endp sinhf
+ASM_SIZE_DIRECTIVE(sinhf)
+#ifdef _LIBC
+ASM_SIZE_DIRECTIVE(__ieee754_sinhf)
+#endif
+
+// Stack operations when calling error support.
+// (1) (2) (3) (call) (4)
+// sp -> + psp -> + psp -> + sp -> +
+// | | | |
+// | | <- GR_Y R3 ->| <- GR_RESULT | -> f8
+// | | | |
+// | <-GR_Y Y2->| Y2 ->| <- GR_Y |
+// | | | |
+// | | <- GR_X X1 ->| |
+// | | | |
+// sp-64 -> + sp -> + sp -> + +
+// save ar.pfs save b0 restore gp
+// save gp restore ar.pfs
+
+.proc __libm_error_region
+__libm_error_region:
+L(SINH_ERROR_SUPPORT):
+.prologue
+
+// (1)
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+
+
+// (2)
+{ .mmi
+ stfs [GR_Parameter_Y] = f0,16 // STORE Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+
+.body
+// (3)
+{ .mib
+ stfs [GR_Parameter_X] = f8 // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
+}
+{ .mib
+ stfs [GR_Parameter_Y] = f44 // STORE Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+
+// (4)
+{ .mmi
+ ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_sinhl.S b/sysdeps/ia64/fpu/e_sinhl.S
new file mode 100644
index 0000000..b697c48
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_sinhl.S
@@ -0,0 +1,1311 @@
+.file "sinhl.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00 Initial version
+// 4/04/00 Unwind support added
+// 8/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+// 10/12/00 Update to set denormal operand and underflow flags
+// 1/22/01 Fixed to set inexact flag for small args. Fixed incorrect
+// call to __libm_error_support for 710.476 < x < 11357.2166.
+//
+// API
+//==============================================================
+// long double = sinhl(long double)
+// input floating point f8
+// output floating point f8
+//
+// Registers used
+//==============================================================
+// general registers:
+// r32 -> r47
+// predicate registers used:
+// p6 p7 p8 p9
+// floating-point registers used:
+// f9 -> f15; f32 -> f45;
+// f8 has input, then output
+//
+// Overview of operation
+//==============================================================
+// There are four paths
+// 1. |x| < 0.25 SINH_BY_POLY
+// 2. |x| < 32 SINH_BY_TBL
+// 3. |x| < 2^14 SINH_BY_EXP
+// 4. |x_ >= 2^14 SINH_HUGE
+//
+// For double extended we get infinity for x >= 400c b174 ddc0 31ae c0ea
+// >= 1.0110001.... x 2^13
+// >= 11357.2166
+//
+// But for double we get infinity for x >= 408633ce8fb9f87e
+// >= 1.0110...x 2^9
+// >= +7.10476e+002
+//
+// And for single we get infinity for x >= 42b3a496
+// >= 1.0110... 2^6
+// >= 89.8215
+//
+// SAFE: If there is danger of overflow set SAFE to 0
+// NOT implemented: if there is danger of underflow, set SAFE to 0
+// SAFE for all paths listed below
+//
+// 1. SINH_BY_POLY
+// ===============
+// If |x| is less than the tiny threshold, then clear SAFE
+// For double, the tiny threshold is -1022 = -0x3fe => -3fe + ffff = fc01
+// register-biased, this is fc01
+// For single, the tiny threshold is -126 = -7e => -7e + ffff = ff81
+// If |x| < tiny threshold, set SAFE = 0
+//
+// 2. SINH_BY_TBL
+// =============
+// SAFE: SAFE is always 1 for TBL;
+//
+// 3. SINH_BY_EXP
+// ==============
+// There is a danger of double-extended overflow if N-1 > 16382 = 0x3ffe
+// r34 has N-1; 16382 is in register biased form, 0x13ffd
+// There is danger of double overflow if N-1 > 0x3fe
+// in register biased form, 0x103fd
+// Analagously, there is danger of single overflow if N-1 > 0x7e
+// in register biased form, 0x1007d
+// SAFE: If there is danger of overflow set SAFE to 0
+//
+// 4. SINH_HUGE
+// ============
+// SAFE: SAFE is always 0 for HUGE
+//
+
+#include "libm_support.h"
+
+// Assembly macros
+//==============================================================
+sinh_FR_X = f44
+sinh_FR_X2 = f9
+sinh_FR_X4 = f10
+sinh_FR_SGNX = f40
+sinh_FR_all_ones = f45
+sinh_FR_tmp = f42
+
+sinh_FR_Inv_log2by64 = f9
+sinh_FR_log2by64_lo = f11
+sinh_FR_log2by64_hi = f10
+
+sinh_FR_A1 = f9
+sinh_FR_A2 = f10
+sinh_FR_A3 = f11
+
+sinh_FR_Rcub = f12
+sinh_FR_M_temp = f13
+sinh_FR_R_temp = f13
+sinh_FR_Rsq = f13
+sinh_FR_R = f14
+
+sinh_FR_M = f38
+
+sinh_FR_B1 = f15
+sinh_FR_B2 = f32
+sinh_FR_B3 = f33
+
+sinh_FR_peven_temp1 = f34
+sinh_FR_peven_temp2 = f35
+sinh_FR_peven = f36
+
+sinh_FR_podd_temp1 = f34
+sinh_FR_podd_temp2 = f35
+sinh_FR_podd = f37
+
+sinh_FR_poly_podd_temp1 = f11
+sinh_FR_poly_podd_temp2 = f13
+sinh_FR_poly_peven_temp1 = f11
+sinh_FR_poly_peven_temp2 = f13
+
+sinh_FR_J_temp = f9
+sinh_FR_J = f10
+
+sinh_FR_Mmj = f39
+
+sinh_FR_N_temp1 = f11
+sinh_FR_N_temp2 = f12
+sinh_FR_N = f13
+
+sinh_FR_spos = f14
+sinh_FR_sneg = f15
+
+sinh_FR_Tjhi = f32
+sinh_FR_Tjlo = f33
+sinh_FR_Tmjhi = f34
+sinh_FR_Tmjlo = f35
+
+sinh_GR_mJ = r35
+sinh_GR_J = r36
+
+sinh_AD_mJ = r38
+sinh_AD_J = r39
+sinh_GR_all_ones = r40
+
+sinh_FR_S_hi = f9
+sinh_FR_S_hi_temp = f10
+sinh_FR_S_lo_temp1 = f11
+sinh_FR_S_lo_temp2 = f12
+sinh_FR_S_lo_temp3 = f13
+
+sinh_FR_S_lo = f38
+sinh_FR_C_hi = f39
+
+sinh_FR_C_hi_temp1 = f10
+sinh_FR_Y_hi = f11
+sinh_FR_Y_lo_temp = f12
+sinh_FR_Y_lo = f13
+sinh_FR_SINH = f9
+
+sinh_FR_P1 = f14
+sinh_FR_P2 = f15
+sinh_FR_P3 = f32
+sinh_FR_P4 = f33
+sinh_FR_P5 = f34
+sinh_FR_P6 = f35
+
+sinh_FR_TINY_THRESH = f9
+
+sinh_FR_SINH_temp = f10
+sinh_FR_SCALE = f11
+
+sinh_FR_signed_hi_lo = f10
+
+
+GR_SAVE_PFS = r41
+GR_SAVE_B0 = r42
+GR_SAVE_GP = r43
+
+GR_Parameter_X = r44
+GR_Parameter_Y = r45
+GR_Parameter_RESULT = r46
+
+// Data tables
+//==============================================================
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
+.align 16
+double_sinh_arg_reduction:
+ASM_TYPE_DIRECTIVE(double_sinh_arg_reduction,@object)
+ data8 0xB8AA3B295C17F0BC, 0x00004005
+ data8 0xB17217F7D1000000, 0x00003FF8
+ data8 0xCF79ABC9E3B39804, 0x00003FD0
+ASM_SIZE_DIRECTIVE(double_sinh_arg_reduction)
+
+double_sinh_p_table:
+ASM_TYPE_DIRECTIVE(double_sinh_p_table,@object)
+ data8 0xAAAAAAAAAAAAAAAB, 0x00003FFC
+ data8 0x8888888888888412, 0x00003FF8
+ data8 0xD00D00D00D4D39F2, 0x00003FF2
+ data8 0xB8EF1D28926D8891, 0x00003FEC
+ data8 0xD732377688025BE9, 0x00003FE5
+ data8 0xB08AF9AE78C1239F, 0x00003FDE
+ASM_SIZE_DIRECTIVE(double_sinh_p_table)
+
+double_sinh_ab_table:
+ASM_TYPE_DIRECTIVE(double_sinh_ab_table,@object)
+ data8 0xAAAAAAAAAAAAAAAC, 0x00003FFC
+ data8 0x88888888884ECDD5, 0x00003FF8
+ data8 0xD00D0C6DCC26A86B, 0x00003FF2
+ data8 0x8000000000000002, 0x00003FFE
+ data8 0xAAAAAAAAAA402C77, 0x00003FFA
+ data8 0xB60B6CC96BDB144D, 0x00003FF5
+ASM_SIZE_DIRECTIVE(double_sinh_ab_table)
+
+double_sinh_j_table:
+ASM_TYPE_DIRECTIVE(double_sinh_j_table,@object)
+ data8 0xB504F333F9DE6484, 0x00003FFE, 0x1EB2FB13, 0x00000000
+ data8 0xB6FD91E328D17791, 0x00003FFE, 0x1CE2CBE2, 0x00000000
+ data8 0xB8FBAF4762FB9EE9, 0x00003FFE, 0x1DDC3CBC, 0x00000000
+ data8 0xBAFF5AB2133E45FB, 0x00003FFE, 0x1EE9AA34, 0x00000000
+ data8 0xBD08A39F580C36BF, 0x00003FFE, 0x9EAEFDC1, 0x00000000
+ data8 0xBF1799B67A731083, 0x00003FFE, 0x9DBF517B, 0x00000000
+ data8 0xC12C4CCA66709456, 0x00003FFE, 0x1EF88AFB, 0x00000000
+ data8 0xC346CCDA24976407, 0x00003FFE, 0x1E03B216, 0x00000000
+ data8 0xC5672A115506DADD, 0x00003FFE, 0x1E78AB43, 0x00000000
+ data8 0xC78D74C8ABB9B15D, 0x00003FFE, 0x9E7B1747, 0x00000000
+ data8 0xC9B9BD866E2F27A3, 0x00003FFE, 0x9EFE3C0E, 0x00000000
+ data8 0xCBEC14FEF2727C5D, 0x00003FFE, 0x9D36F837, 0x00000000
+ data8 0xCE248C151F8480E4, 0x00003FFE, 0x9DEE53E4, 0x00000000
+ data8 0xD06333DAEF2B2595, 0x00003FFE, 0x9E24AE8E, 0x00000000
+ data8 0xD2A81D91F12AE45A, 0x00003FFE, 0x1D912473, 0x00000000
+ data8 0xD4F35AABCFEDFA1F, 0x00003FFE, 0x1EB243BE, 0x00000000
+ data8 0xD744FCCAD69D6AF4, 0x00003FFE, 0x1E669A2F, 0x00000000
+ data8 0xD99D15C278AFD7B6, 0x00003FFE, 0x9BBC610A, 0x00000000
+ data8 0xDBFBB797DAF23755, 0x00003FFE, 0x1E761035, 0x00000000
+ data8 0xDE60F4825E0E9124, 0x00003FFE, 0x9E0BE175, 0x00000000
+ data8 0xE0CCDEEC2A94E111, 0x00003FFE, 0x1CCB12A1, 0x00000000
+ data8 0xE33F8972BE8A5A51, 0x00003FFE, 0x1D1BFE90, 0x00000000
+ data8 0xE5B906E77C8348A8, 0x00003FFE, 0x1DF2F47A, 0x00000000
+ data8 0xE8396A503C4BDC68, 0x00003FFE, 0x1EF22F22, 0x00000000
+ data8 0xEAC0C6E7DD24392F, 0x00003FFE, 0x9E3F4A29, 0x00000000
+ data8 0xED4F301ED9942B84, 0x00003FFE, 0x1EC01A5B, 0x00000000
+ data8 0xEFE4B99BDCDAF5CB, 0x00003FFE, 0x1E8CAC3A, 0x00000000
+ data8 0xF281773C59FFB13A, 0x00003FFE, 0x9DBB3FAB, 0x00000000
+ data8 0xF5257D152486CC2C, 0x00003FFE, 0x1EF73A19, 0x00000000
+ data8 0xF7D0DF730AD13BB9, 0x00003FFE, 0x9BB795B5, 0x00000000
+ data8 0xFA83B2DB722A033A, 0x00003FFE, 0x1EF84B76, 0x00000000
+ data8 0xFD3E0C0CF486C175, 0x00003FFE, 0x9EF5818B, 0x00000000
+ data8 0x8000000000000000, 0x00003FFF, 0x00000000, 0x00000000
+ data8 0x8164D1F3BC030773, 0x00003FFF, 0x1F77CACA, 0x00000000
+ data8 0x82CD8698AC2BA1D7, 0x00003FFF, 0x1EF8A91D, 0x00000000
+ data8 0x843A28C3ACDE4046, 0x00003FFF, 0x1E57C976, 0x00000000
+ data8 0x85AAC367CC487B15, 0x00003FFF, 0x9EE8DA92, 0x00000000
+ data8 0x871F61969E8D1010, 0x00003FFF, 0x1EE85C9F, 0x00000000
+ data8 0x88980E8092DA8527, 0x00003FFF, 0x1F3BF1AF, 0x00000000
+ data8 0x8A14D575496EFD9A, 0x00003FFF, 0x1D80CA1E, 0x00000000
+ data8 0x8B95C1E3EA8BD6E7, 0x00003FFF, 0x9D0373AF, 0x00000000
+ data8 0x8D1ADF5B7E5BA9E6, 0x00003FFF, 0x9F167097, 0x00000000
+ data8 0x8EA4398B45CD53C0, 0x00003FFF, 0x1EB70051, 0x00000000
+ data8 0x9031DC431466B1DC, 0x00003FFF, 0x1F6EB029, 0x00000000
+ data8 0x91C3D373AB11C336, 0x00003FFF, 0x1DFD6D8E, 0x00000000
+ data8 0x935A2B2F13E6E92C, 0x00003FFF, 0x9EB319B0, 0x00000000
+ data8 0x94F4EFA8FEF70961, 0x00003FFF, 0x1EBA2BEB, 0x00000000
+ data8 0x96942D3720185A00, 0x00003FFF, 0x1F11D537, 0x00000000
+ data8 0x9837F0518DB8A96F, 0x00003FFF, 0x1F0D5A46, 0x00000000
+ data8 0x99E0459320B7FA65, 0x00003FFF, 0x9E5E7BCA, 0x00000000
+ data8 0x9B8D39B9D54E5539, 0x00003FFF, 0x9F3AAFD1, 0x00000000
+ data8 0x9D3ED9A72CFFB751, 0x00003FFF, 0x9E86DACC, 0x00000000
+ data8 0x9EF5326091A111AE, 0x00003FFF, 0x9F3EDDC2, 0x00000000
+ data8 0xA0B0510FB9714FC2, 0x00003FFF, 0x1E496E3D, 0x00000000
+ data8 0xA27043030C496819, 0x00003FFF, 0x9F490BF6, 0x00000000
+ data8 0xA43515AE09E6809E, 0x00003FFF, 0x1DD1DB48, 0x00000000
+ data8 0xA5FED6A9B15138EA, 0x00003FFF, 0x1E65EBFB, 0x00000000
+ data8 0xA7CD93B4E965356A, 0x00003FFF, 0x9F427496, 0x00000000
+ data8 0xA9A15AB4EA7C0EF8, 0x00003FFF, 0x1F283C4A, 0x00000000
+ data8 0xAB7A39B5A93ED337, 0x00003FFF, 0x1F4B0047, 0x00000000
+ data8 0xAD583EEA42A14AC6, 0x00003FFF, 0x1F130152, 0x00000000
+ data8 0xAF3B78AD690A4375, 0x00003FFF, 0x9E8367C0, 0x00000000
+ data8 0xB123F581D2AC2590, 0x00003FFF, 0x9F705F90, 0x00000000
+ data8 0xB311C412A9112489, 0x00003FFF, 0x1EFB3C53, 0x00000000
+ data8 0xB504F333F9DE6484, 0x00003FFF, 0x1F32FB13, 0x00000000
+ASM_SIZE_DIRECTIVE(double_sinh_j_table)
+
+.align 32
+.global sinhl#
+
+.section .text
+.proc sinhl#
+.align 32
+
+sinhl:
+#ifdef _LIBC
+.global __ieee754_sinhl
+.type __ieee754_sinhl,@function
+__ieee754_sinhl:
+#endif
+
+// X infinity or NAN?
+// Take invalid fault if enabled
+
+
+{ .mfi
+ alloc r32 = ar.pfs,0,12,4,0
+(p0) fclass.m.unc p6,p0 = f8, 0xe3 //@qnan | @snan | @inf
+ mov sinh_GR_all_ones = -1
+}
+;;
+
+
+{ .mfb
+ nop.m 999
+(p6) fma.s0 f8 = f8,f1,f8
+(p6) br.ret.spnt b0 ;;
+}
+
+// Put 0.25 in f9; p6 true if x < 0.25
+// Make constant that will generate inexact when squared
+{ .mlx
+ setf.sig sinh_FR_all_ones = sinh_GR_all_ones
+(p0) movl r32 = 0x000000000000fffd ;;
+}
+
+{ .mfi
+(p0) setf.exp f9 = r32
+(p0) fclass.m.unc p7,p0 = f8, 0x07 //@zero
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p0) fmerge.s sinh_FR_X = f0,f8
+(p7) br.ret.spnt b0 ;;
+}
+
+// Identify denormal operands.
+{ .mfi
+ nop.m 999
+ fclass.m.unc p10,p0 = f8, 0x09 // + denorm
+ nop.i 999
+};;
+{ .mfi
+ nop.m 999
+ fclass.m.unc p11,p0 = f8, 0x0a // - denorm
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fmerge.s sinh_FR_SGNX = f8,f1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fcmp.lt.unc.s1 p0,p7 = sinh_FR_X,f9
+ nop.i 999 ;;
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+(p7) br.cond.sptk L(SINH_BY_TBL) ;;
+}
+
+
+L(SINH_BY_POLY):
+
+// POLY cannot overflow so there is no need to call __libm_error_support
+// Set tiny_SAFE (p7) to 1(0) if answer is not tiny
+// Currently we do not use tiny_SAFE. So the setting of tiny_SAFE is
+// commented out.
+//(p0) movl r32 = 0x000000000000fc01
+//(p0) setf.exp f10 = r32
+//(p0) fcmp.lt.unc.s1 p6,p7 = f8,f10
+// Here is essentially the algorithm for SINH_BY_POLY. Care is take for the order
+// of multiplication; and P_1 is not exactly 1/3!, P_2 is not exactly 1/5!, etc.
+// Note that ax = |x|
+// sinh(x) = sign * (series(e^x) - series(e^-x))/2
+// = sign * (ax + ax^3/3! + ax^5/5! + ax^7/7! + ax^9/9! + ax^11/11! + ax^13/13!)
+// = sign * (ax + ax * ( ax^2 * (1/3! + ax^4 * (1/7! + ax^4*1/11!)) )
+// + ax * ( ax^4 * (1/5! + ax^4 * (1/9! + ax^4*1/13!)) ) )
+// = sign * (ax + ax*p_odd + (ax*p_even))
+// = sign * (ax + Y_lo)
+// sinh(x) = sign * (Y_hi + Y_lo)
+// Get the values of P_x from the table
+{ .mfb
+(p0) addl r34 = @ltoff(double_sinh_p_table), gp
+(p10) fma.s0 f8 = f8,f8,f8
+(p10) br.ret.spnt b0
+}
+;;
+
+{ .mfb
+ ld8 r34 = [r34]
+(p11) fnma.s0 f8 = f8,f8,f8
+(p11) br.ret.spnt b0
+}
+;;
+
+// Calculate sinh_FR_X2 = ax*ax and sinh_FR_X4 = ax*ax*ax*ax
+{ .mmf
+ nop.m 999
+(p0) ldfe sinh_FR_P1 = [r34],16
+(p0) fma.s1 sinh_FR_X2 = sinh_FR_X, sinh_FR_X, f0 ;;
+}
+
+{ .mmi
+(p0) ldfe sinh_FR_P2 = [r34],16 ;;
+(p0) ldfe sinh_FR_P3 = [r34],16
+ nop.i 999 ;;
+}
+
+{ .mmi
+(p0) ldfe sinh_FR_P4 = [r34],16 ;;
+(p0) ldfe sinh_FR_P5 = [r34],16
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p0) ldfe sinh_FR_P6 = [r34],16
+(p0) fma.s1 sinh_FR_X4 = sinh_FR_X2, sinh_FR_X2, f0
+ nop.i 999 ;;
+}
+
+// Calculate sinh_FR_podd = p_odd and sinh_FR_peven = p_even
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_poly_podd_temp1 = sinh_FR_X4, sinh_FR_P5, sinh_FR_P3
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_poly_podd_temp2 = sinh_FR_X4, sinh_FR_poly_podd_temp1, sinh_FR_P1
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_poly_peven_temp1 = sinh_FR_X4, sinh_FR_P6, sinh_FR_P4
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_podd = sinh_FR_X2, sinh_FR_poly_podd_temp2, f0
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_poly_peven_temp2 = sinh_FR_X4, sinh_FR_poly_peven_temp1, sinh_FR_P2
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_peven = sinh_FR_X4, sinh_FR_poly_peven_temp2, f0
+ nop.i 999 ;;
+}
+
+// Calculate sinh_FR_Y_lo = ax*p_odd + (ax*p_even)
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_Y_lo_temp = sinh_FR_X, sinh_FR_peven, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_Y_lo = sinh_FR_X, sinh_FR_podd, sinh_FR_Y_lo_temp
+ nop.i 999 ;;
+}
+
+// Calculate sinh_FR_SINH = Y_hi + Y_lo. Note that ax = Y_hi
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_SINH = sinh_FR_X, f1, sinh_FR_Y_lo
+ nop.i 999 ;;
+}
+// Dummy multiply to generate inexact
+{ .mfi
+ nop.m 999
+(p0) fmpy.s0 sinh_FR_tmp = sinh_FR_all_ones, sinh_FR_all_ones
+ nop.i 999
+}
+
+// Calculate f8 = sign * (Y_hi + Y_lo)
+// Go to return
+{ .mfb
+ nop.m 999
+(p0) fma.s0 f8 = sinh_FR_SGNX,sinh_FR_SINH,f0
+(p0) br.ret.sptk b0 ;;
+}
+
+
+L(SINH_BY_TBL):
+
+// Now that we are at TBL; so far all we know is that |x| >= 0.25.
+// The first two steps are the same for TBL and EXP, but if we are HUGE
+// we want to leave now.
+// Double-extended:
+// Go to HUGE if |x| >= 2^14, 1000d (register-biased) is e = 14 (true)
+// Double
+// Go to HUGE if |x| >= 2^10, 10009 (register-biased) is e = 10 (true)
+// Single
+// Go to HUGE if |x| >= 2^7, 10006 (register-biased) is e = 7 (true)
+
+{ .mlx
+ nop.m 999
+(p0) movl r32 = 0x000000000001000d ;;
+}
+
+{ .mfi
+(p0) setf.exp f9 = r32
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fcmp.ge.unc.s1 p6,p7 = sinh_FR_X,f9
+ nop.i 999 ;;
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+(p6) br.cond.spnt L(SINH_HUGE) ;;
+}
+
+// r32 = 1
+// r34 = N-1
+// r35 = N
+// r36 = j
+// r37 = N+1
+
+// TBL can never overflow
+// sinh(x) = sinh(B+R)
+// = sinh(B)cosh(R) + cosh(B)sinh(R)
+//
+// ax = |x| = M*log2/64 + R
+// B = M*log2/64
+// M = 64*N + j
+// We will calcualte M and get N as (M-j)/64
+// The division is a shift.
+// exp(B) = exp(N*log2 + j*log2/64)
+// = 2^N * 2^(j*log2/64)
+// sinh(B) = 1/2(e^B -e^-B)
+// = 1/2(2^N * 2^(j*log2/64) - 2^-N * 2^(-j*log2/64))
+// sinh(B) = (2^(N-1) * 2^(j*log2/64) - 2^(-N-1) * 2^(-j*log2/64))
+// cosh(B) = (2^(N-1) * 2^(j*log2/64) + 2^(-N-1) * 2^(-j*log2/64))
+// 2^(j*log2/64) is stored as Tjhi + Tjlo , j= -32,....,32
+// Tjhi is double-extended (80-bit) and Tjlo is single(32-bit)
+// R = ax - M*log2/64
+// R = ax - M*log2_by_64_hi - M*log2_by_64_lo
+// exp(R) = 1 + R +R^2(1/2! + R(1/3! + R(1/4! + ... + R(1/n!)...)
+// = 1 + p_odd + p_even
+// where the p_even uses the A coefficients and the p_even uses the B coefficients
+// So sinh(R) = 1 + p_odd + p_even -(1 -p_odd -p_even)/2 = p_odd
+// cosh(R) = 1 + p_even
+// sinh(B) = S_hi + S_lo
+// cosh(B) = C_hi
+// sinh(x) = sinh(B)cosh(R) + cosh(B)sinh(R)
+// ******************************************************
+// STEP 1 (TBL and EXP)
+// ******************************************************
+// Get the following constants.
+// f9 = Inv_log2by64
+// f10 = log2by64_hi
+// f11 = log2by64_lo
+
+{ .mmi
+(p0) adds r32 = 0x1,r0
+(p0) addl r34 = @ltoff(double_sinh_arg_reduction), gp
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ld8 r34 = [r34]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+
+// We want 2^(N-1) and 2^(-N-1). So bias N-1 and -N-1 and
+// put them in an exponent.
+// sinh_FR_spos = 2^(N-1) and sinh_FR_sneg = 2^(-N-1)
+// r39 = 0xffff + (N-1) = 0xffff +N -1
+// r40 = 0xffff - (N +1) = 0xffff -N -1
+
+{ .mlx
+ nop.m 999
+(p0) movl r38 = 0x000000000000fffe ;;
+}
+
+{ .mmi
+(p0) ldfe sinh_FR_Inv_log2by64 = [r34],16 ;;
+(p0) ldfe sinh_FR_log2by64_hi = [r34],16
+ nop.i 999 ;;
+}
+
+{ .mbb
+(p0) ldfe sinh_FR_log2by64_lo = [r34],16
+ nop.b 999
+ nop.b 999 ;;
+}
+
+// Get the A coefficients
+// f9 = A_1
+// f10 = A_2
+// f11 = A_3
+
+{ .mmi
+ nop.m 999
+(p0) addl r34 = @ltoff(double_sinh_ab_table), gp
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ld8 r34 = [r34]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+
+// Calculate M and keep it as integer and floating point.
+// f38 = M = round-to-integer(x*Inv_log2by64)
+// sinh_FR_M = M = truncate(ax/(log2/64))
+// Put the significand of M in r35
+// and the floating point representation of M in sinh_FR_M
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_M = sinh_FR_X, sinh_FR_Inv_log2by64, f0
+ nop.i 999
+}
+
+{ .mfi
+(p0) ldfe sinh_FR_A1 = [r34],16
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fcvt.fx.s1 sinh_FR_M_temp = sinh_FR_M
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fnorm.s1 sinh_FR_M = sinh_FR_M_temp
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p0) getf.sig r35 = sinh_FR_M_temp
+ nop.f 999
+ nop.i 999 ;;
+}
+
+// M is still in r35. Calculate j. j is the signed extension of the six lsb of M. It
+// has a range of -32 thru 31.
+// r35 = M
+// r36 = j
+
+{ .mii
+ nop.m 999
+ nop.i 999 ;;
+(p0) and r36 = 0x3f, r35 ;;
+}
+
+// Calculate R
+// f13 = f44 - f12*f10 = ax - M*log2by64_hi
+// f14 = f13 - f8*f11 = R = (ax - M*log2by64_hi) - M*log2by64_lo
+
+{ .mfi
+ nop.m 999
+(p0) fnma.s1 sinh_FR_R_temp = sinh_FR_M, sinh_FR_log2by64_hi, sinh_FR_X
+ nop.i 999
+}
+
+{ .mfi
+(p0) ldfe sinh_FR_A2 = [r34],16
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fnma.s1 sinh_FR_R = sinh_FR_M, sinh_FR_log2by64_lo, sinh_FR_R_temp
+ nop.i 999
+}
+
+// Get the B coefficients
+// f15 = B_1
+// f32 = B_2
+// f33 = B_3
+
+{ .mmi
+(p0) ldfe sinh_FR_A3 = [r34],16 ;;
+(p0) ldfe sinh_FR_B1 = [r34],16
+ nop.i 999 ;;
+}
+
+{ .mmi
+(p0) ldfe sinh_FR_B2 = [r34],16 ;;
+(p0) ldfe sinh_FR_B3 = [r34],16
+ nop.i 999 ;;
+}
+
+{ .mii
+ nop.m 999
+(p0) shl r34 = r36, 0x2 ;;
+(p0) sxt1 r37 = r34 ;;
+}
+
+// ******************************************************
+// STEP 2 (TBL and EXP)
+// ******************************************************
+// Calculate Rsquared and Rcubed in preparation for p_even and p_odd
+// f12 = R*R*R
+// f13 = R*R
+// f14 = R <== from above
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_Rsq = sinh_FR_R, sinh_FR_R, f0
+(p0) shr r36 = r37, 0x2 ;;
+}
+
+// r34 = M-j = r35 - r36
+// r35 = N = (M-j)/64
+
+{ .mii
+(p0) sub r34 = r35, r36
+ nop.i 999 ;;
+(p0) shr r35 = r34, 0x6 ;;
+}
+
+{ .mii
+(p0) sub r40 = r38, r35
+(p0) adds r37 = 0x1, r35
+(p0) add r39 = r38, r35 ;;
+}
+
+// Get the address of the J table, add the offset,
+// addresses are sinh_AD_mJ and sinh_AD_J, get the T value
+// f32 = T(j)_hi
+// f33 = T(j)_lo
+// f34 = T(-j)_hi
+// f35 = T(-j)_lo
+
+{ .mmi
+(p0) sub r34 = r35, r32
+(p0) addl r37 = @ltoff(double_sinh_j_table), gp
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ld8 r37 = [r37]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_Rcub = sinh_FR_Rsq, sinh_FR_R, f0
+ nop.i 999
+}
+
+// ******************************************************
+// STEP 3 Now decide if we need to branch to EXP
+// ******************************************************
+// Put 32 in f9; p6 true if x < 32
+// Go to EXP if |x| >= 32
+
+{ .mlx
+ nop.m 999
+(p0) movl r32 = 0x0000000000010004 ;;
+}
+
+// Calculate p_even
+// f34 = B_2 + Rsq *B_3
+// f35 = B_1 + Rsq*f34 = B_1 + Rsq * (B_2 + Rsq *B_3)
+// f36 = p_even = Rsq * f35 = Rsq * (B_1 + Rsq * (B_2 + Rsq *B_3))
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_peven_temp1 = sinh_FR_Rsq, sinh_FR_B3, sinh_FR_B2
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_peven_temp2 = sinh_FR_Rsq, sinh_FR_peven_temp1, sinh_FR_B1
+ nop.i 999
+}
+
+// Calculate p_odd
+// f34 = A_2 + Rsq *A_3
+// f35 = A_1 + Rsq * (A_2 + Rsq *A_3)
+// f37 = podd = R + Rcub * (A_1 + Rsq * (A_2 + Rsq *A_3))
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_podd_temp1 = sinh_FR_Rsq, sinh_FR_A3, sinh_FR_A2
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p0) setf.exp sinh_FR_N_temp1 = r39
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_peven = sinh_FR_Rsq, sinh_FR_peven_temp2, f0
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_podd_temp2 = sinh_FR_Rsq, sinh_FR_podd_temp1, sinh_FR_A1
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p0) setf.exp f9 = r32
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_podd = sinh_FR_podd_temp2, sinh_FR_Rcub, sinh_FR_R
+ nop.i 999
+}
+
+// sinh_GR_mj contains the table offset for -j
+// sinh_GR_j contains the table offset for +j
+// p6 is true when j <= 0
+
+{ .mlx
+(p0) setf.exp sinh_FR_N_temp2 = r40
+(p0) movl r40 = 0x0000000000000020 ;;
+}
+
+{ .mfi
+(p0) sub sinh_GR_mJ = r40, r36
+(p0) fmerge.se sinh_FR_spos = sinh_FR_N_temp1, f1
+(p0) adds sinh_GR_J = 0x20, r36 ;;
+}
+
+{ .mii
+ nop.m 999
+(p0) shl sinh_GR_mJ = sinh_GR_mJ, 5 ;;
+(p0) add sinh_AD_mJ = r37, sinh_GR_mJ ;;
+}
+
+{ .mmi
+ nop.m 999
+(p0) ldfe sinh_FR_Tmjhi = [sinh_AD_mJ],16
+(p0) shl sinh_GR_J = sinh_GR_J, 5 ;;
+}
+
+{ .mfi
+(p0) ldfs sinh_FR_Tmjlo = [sinh_AD_mJ],16
+(p0) fcmp.lt.unc.s1 p0,p7 = sinh_FR_X,f9
+(p0) add sinh_AD_J = r37, sinh_GR_J ;;
+}
+
+{ .mmi
+(p0) ldfe sinh_FR_Tjhi = [sinh_AD_J],16 ;;
+(p0) ldfs sinh_FR_Tjlo = [sinh_AD_J],16
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p0) fmerge.se sinh_FR_sneg = sinh_FR_N_temp2, f1
+(p7) br.cond.spnt L(SINH_BY_EXP) ;;
+}
+
+{ .mfi
+ nop.m 999
+ nop.f 999
+ nop.i 999 ;;
+}
+
+// ******************************************************
+// If NOT branch to EXP
+// ******************************************************
+// Calculate S_hi and S_lo
+// sinh_FR_S_hi_temp = sinh_FR_sneg * sinh_FR_Tmjhi
+// sinh_FR_S_hi = sinh_FR_spos * sinh_FR_Tjhi - sinh_FR_S_hi_temp
+// sinh_FR_S_hi = sinh_FR_spos * sinh_FR_Tjhi - (sinh_FR_sneg * sinh_FR_Tmjlo)
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_S_hi_temp = sinh_FR_sneg, sinh_FR_Tmjhi, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fms.s1 sinh_FR_S_hi = sinh_FR_spos, sinh_FR_Tjhi, sinh_FR_S_hi_temp
+ nop.i 999
+}
+
+// Calculate C_hi
+// sinh_FR_C_hi_temp1 = sinh_FR_sneg * sinh_FR_Tmjhi
+// sinh_FR_C_hi = sinh_FR_spos * sinh_FR_Tjhi + sinh_FR_C_hi_temp1
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_C_hi_temp1 = sinh_FR_sneg, sinh_FR_Tmjhi, f0
+ nop.i 999 ;;
+}
+
+// sinh_FR_S_lo_temp1 = sinh_FR_spos * sinh_FR_Tjhi - sinh_FR_S_hi
+// sinh_FR_S_lo_temp2 = -sinh_FR_sneg * sinh_FR_Tmjlo + (sinh_FR_spos * sinh_FR_Tjhi - sinh_FR_S_hi)
+// sinh_FR_S_lo_temp2 = -sinh_FR_sneg * sinh_FR_Tmjlo + (sinh_FR_S_lo_temp1 )
+
+{ .mfi
+ nop.m 999
+(p0) fms.s1 sinh_FR_S_lo_temp1 = sinh_FR_spos, sinh_FR_Tjhi, sinh_FR_S_hi
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_C_hi = sinh_FR_spos, sinh_FR_Tjhi, sinh_FR_C_hi_temp1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fnma.s1 sinh_FR_S_lo_temp2 = sinh_FR_sneg, sinh_FR_Tmjhi, sinh_FR_S_lo_temp1
+ nop.i 999
+}
+
+// sinh_FR_S_lo_temp1 = sinh_FR_sneg * sinh_FR_Tmjlo
+// sinh_FR_S_lo_temp3 = sinh_FR_spos * sinh_FR_Tjlo - sinh_FR_S_lo_temp1
+// sinh_FR_S_lo_temp3 = sinh_FR_spos * sinh_FR_Tjlo -(sinh_FR_sneg * sinh_FR_Tmjlo)
+// sinh_FR_S_lo = sinh_FR_S_lo_temp3 + sinh_FR_S_lo_temp2
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_S_lo_temp1 = sinh_FR_sneg, sinh_FR_Tmjlo, f0
+ nop.i 999 ;;
+}
+
+/////////// BUG FIX fma to fms -TK
+{ .mfi
+ nop.m 999
+(p0) fms.s1 sinh_FR_S_lo_temp3 = sinh_FR_spos, sinh_FR_Tjlo, sinh_FR_S_lo_temp1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_S_lo = sinh_FR_S_lo_temp3, f1, sinh_FR_S_lo_temp2
+ nop.i 999 ;;
+}
+
+// Y_hi = S_hi
+// Y_lo = C_hi*p_odd + (S_hi*p_even + S_lo)
+// sinh_FR_Y_lo_temp = sinh_FR_S_hi * sinh_FR_peven + sinh_FR_S_lo
+// sinh_FR_Y_lo = sinh_FR_C_hi * sinh_FR_podd + sinh_FR_Y_lo_temp
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_Y_lo_temp = sinh_FR_S_hi, sinh_FR_peven, sinh_FR_S_lo
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_Y_lo = sinh_FR_C_hi, sinh_FR_podd, sinh_FR_Y_lo_temp
+ nop.i 999 ;;
+}
+
+// sinh_FR_SINH = Y_hi + Y_lo
+// f8 = answer = sinh_FR_SGNX * sinh_FR_SINH
+
+// Dummy multiply to generate inexact
+{ .mfi
+ nop.m 999
+(p0) fmpy.s0 sinh_FR_tmp = sinh_FR_all_ones, sinh_FR_all_ones
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_SINH = sinh_FR_S_hi, f1, sinh_FR_Y_lo
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p0) fma.s0 f8 = sinh_FR_SGNX, sinh_FR_SINH,f0
+(p0) br.ret.sptk b0 ;;
+}
+
+
+L(SINH_BY_EXP):
+
+// When p7 is true, we know that an overflow is not going to happen
+// When p7 is false, we must check for possible overflow
+// p7 is the over_SAFE flag
+// Y_hi = Tjhi
+// Y_lo = Tjhi * (p_odd + p_even) +Tjlo
+// Scale = sign * 2^(N-1)
+// sinh_FR_Y_lo = sinh_FR_Tjhi * (sinh_FR_peven + sinh_FR_podd)
+// sinh_FR_Y_lo = sinh_FR_Tjhi * (sinh_FR_Y_lo_temp )
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_Y_lo_temp = sinh_FR_peven, f1, sinh_FR_podd
+ nop.i 999
+}
+
+// Now we are in EXP. This is the only path where an overflow is possible
+// but not for certain. So this is the only path where over_SAFE has any use.
+// r34 still has N-1
+// There is a danger of double-extended overflow if N-1 > 16382 = 0x3ffe
+// There is a danger of double overflow if N-1 > 0x3fe = 1022
+{ .mlx
+ nop.m 999
+(p0) movl r32 = 0x0000000000003ffe ;;
+}
+
+{ .mfi
+(p0) cmp.gt.unc p0,p7 = r34, r32
+(p0) fmerge.s sinh_FR_SCALE = sinh_FR_SGNX, sinh_FR_spos
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_Y_lo = sinh_FR_Tjhi, sinh_FR_Y_lo_temp, sinh_FR_Tjlo
+ nop.i 999 ;;
+}
+
+// f8 = answer = scale * (Y_hi + Y_lo)
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_SINH_temp = sinh_FR_Y_lo, f1, sinh_FR_Tjhi
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s0 f44 = sinh_FR_SCALE, sinh_FR_SINH_temp, f0
+ nop.i 999 ;;
+}
+
+// Dummy multiply to generate inexact
+{ .mfi
+ nop.m 999
+(p7) fmpy.s0 sinh_FR_tmp = sinh_FR_all_ones, sinh_FR_all_ones
+ nop.i 999 ;;
+}
+
+// If over_SAFE is set, return
+{ .mfb
+ nop.m 999
+(p7) fmerge.s f8 = f44,f44
+(p7) br.ret.sptk b0 ;;
+}
+
+// Else see if we overflowed
+// S0 user supplied status
+// S2 user supplied status + WRE + TD (Overflows)
+// If WRE is set then an overflow will not occur in EXP.
+// The input value that would cause a register (WRE) value to overflow is about 2^15
+// and this input would go into the HUGE path.
+// Answer with WRE is in f43.
+
+{ .mfi
+ nop.m 999
+(p0) fsetc.s2 0x7F,0x42
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s2 f43 = sinh_FR_SCALE, sinh_FR_SINH_temp, f0
+ nop.i 999 ;;
+}
+
+// 13FFF => 13FFF -FFFF = 4000(true)
+// 4000 + 3FFF = 7FFF, which is 1 more that the exponent of the largest
+// long double (7FFE). So 0 13FFF 8000000000000000 is one ulp more than
+// largest long double in register bias
+// Now set p8 if the answer with WRE is greater than or equal this value
+// Also set p9 if the answer with WRE is less than or equal to negative this value
+
+{ .mlx
+ nop.m 999
+(p0) movl r32 = 0x00000000013FFF ;;
+}
+
+{ .mmf
+ nop.m 999
+(p0) setf.exp f41 = r32
+(p0) fsetc.s2 0x7F,0x40 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fcmp.ge.unc.s1 p8, p0 = f43, f41
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fmerge.ns f42 = f41, f41
+ nop.i 999 ;;
+}
+
+// The error tag for overflow is 126
+{ .mii
+ nop.m 999
+ nop.i 999 ;;
+(p8) mov r47 = 126 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p0) fcmp.le.unc.s1 p9, p0 = f43, f42
+(p8) br.cond.spnt L(SINH_ERROR_SUPPORT) ;;
+}
+
+{ .mii
+ nop.m 999
+ nop.i 999 ;;
+(p9) mov r47 = 126
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+(p9) br.cond.spnt L(SINH_ERROR_SUPPORT) ;;
+}
+
+// Dummy multiply to generate inexact
+{ .mfi
+ nop.m 999
+(p0) fmpy.s0 sinh_FR_tmp = sinh_FR_all_ones, sinh_FR_all_ones
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p0) fmerge.s f8 = f44,f44
+(p0) br.ret.sptk b0 ;;
+}
+
+L(SINH_HUGE):
+
+// for SINH_HUGE, put 24000 in exponent; take sign from input; add 1
+// SAFE: SAFE is always 0 for HUGE
+
+{ .mlx
+ nop.m 999
+(p0) movl r32 = 0x0000000000015dbf ;;
+}
+
+{ .mfi
+(p0) setf.exp f9 = r32
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sinh_FR_signed_hi_lo = sinh_FR_SGNX, f9, f1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s0 f44 = sinh_FR_signed_hi_lo, f9, f0
+(p0) mov r47 = 126
+}
+.endp sinhl
+ASM_SIZE_DIRECTIVE(sinhl)
+#ifdef _LIBC
+ASM_SIZE_DIRECTIVE(__ieee754_sinhl)
+#endif
+
+// Stack operations when calling error support.
+// (1) (2) (3) (call) (4)
+// sp -> + psp -> + psp -> + sp -> +
+// | | | |
+// | | <- GR_Y R3 ->| <- GR_RESULT | -> f8
+// | | | |
+// | <-GR_Y Y2->| Y2 ->| <- GR_Y |
+// | | | |
+// | | <- GR_X X1 ->| |
+// | | | |
+// sp-64 -> + sp -> + sp -> + +
+// save ar.pfs save b0 restore gp
+// save gp restore ar.pfs
+
+.proc __libm_error_region
+__libm_error_region:
+L(SINH_ERROR_SUPPORT):
+.prologue
+
+// (1)
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+
+
+// (2)
+{ .mmi
+ stfe [GR_Parameter_Y] = f0,16 // STORE Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+
+.body
+// (3)
+{ .mib
+ stfe [GR_Parameter_X] = f8 // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
+}
+{ .mib
+ stfe [GR_Parameter_Y] = f44 // STORE Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+
+// (4)
+{ .mmi
+ ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_sqrt.S b/sysdeps/ia64/fpu/e_sqrt.S
new file mode 100644
index 0000000..ee6eb65
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_sqrt.S
@@ -0,0 +1,347 @@
+.file "sqrt.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// ********************************************************************
+// History
+// ********************************************************************
+// 2/02/00 Initial version
+// 4/04/00 Unwind support added
+// 8/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+//
+// ********************************************************************
+//
+// Function: Combined sqrt(x), where
+// _
+// sqrt(x) = |x, for double precision x values
+//
+// ********************************************************************
+//
+// Accuracy: Correctly Rounded
+//
+// ********************************************************************
+//
+// Resources Used:
+//
+// Floating-Point Registers: f8 (Input and Return Value)
+// f7 -f14
+//
+// General Purpose Registers:
+// r32-r36 (Locals)
+// r37-r40 (Used to pass arguments to error handling routine)
+//
+// Predicate Registers: p6, p7, p8
+//
+// *********************************************************************
+//
+// IEEE Special Conditions:
+//
+// All faults and exceptions should be raised correctly.
+// sqrt(QNaN) = QNaN
+// sqrt(SNaN) = QNaN
+// sqrt(+/-0) = +/-0
+// sqrt(negative) = QNaN and error handling is called
+//
+// *********************************************************************
+//
+// Implementation:
+//
+// Modified Newton-Raphson Algorithm
+//
+// *********************************************************************
+
+#include "libm_support.h"
+
+GR_SAVE_PFS = r33
+GR_SAVE_B0 = r34
+GR_SAVE_GP = r35
+
+GR_Parameter_X = r37
+GR_Parameter_Y = r38
+GR_Parameter_RESULT = r39
+
+
+.section .text
+.proc sqrt#
+.global sqrt#
+.align 64
+
+sqrt:
+#ifdef _LIBC
+.global __sqrt
+.type __sqrt,@function
+__sqrt:
+.global __ieee754_sqrt
+.type __ieee754_sqrt,@function
+__ieee754_sqrt:
+#endif
+{ .mfi
+ alloc r32= ar.pfs,0,5,4,0
+ frsqrta.s0 f7,p6=f8
+ nop.i 0
+} { .mlx
+ // BEGIN DOUBLE PRECISION MINIMUM LATENCY SQUARE ROOT ALGORITHM
+ nop.m 0
+ // exponent of +1/2 in r2
+ movl r2 = 0x0fffe;;
+} { .mmi
+ // +1/2 in f9
+ setf.exp f9 = r2
+ nop.m 0
+ nop.i 0
+} { .mlx
+ nop.m 0
+ // 3/2 in r3
+ movl r3=0x3fc00000;;
+} { .mfi
+ setf.s f10=r3
+ // Step (1)
+ // y0 = 1/sqrt(a) in f7
+ fclass.m.unc p7,p8 = f8,0x3A
+ nop.i 0;;
+} { .mlx
+ nop.m 0
+ // 5/2 in r2
+ movl r2 = 0x40200000
+} { .mlx
+ nop.m 0
+ // 63/8 in r3
+ movl r3 = 0x40fc0000;;
+} { .mfi
+ setf.s f11=r2
+ // Step (2)
+ // h = +1/2 * y0 in f6
+ (p6) fma.s1 f6=f9,f7,f0
+ nop.i 0
+} { .mfi
+ setf.s f12=r3
+ // Step (3)
+ // g = a * y0 in f7
+ (p6) fma.s1 f7=f8,f7,f0
+ nop.i 0
+} { .mfi
+ nop.m 0
+ mov f15 = f8
+ nop.i 0;;
+} { .mlx
+ nop.m 0
+ // 231/16 in r2
+ movl r2 = 0x41670000;;
+} { .mfi
+ setf.s f13=r2
+ // Step (4)
+ // e = 1/2 - g * h in f9
+ (p6) fnma.s1 f9=f7,f6,f9
+ nop.i 0
+} { .mlx
+ nop.m 0
+ // 35/8 in r3
+ movl r3 = 0x408c0000;;
+} { .mfi
+ setf.s f14=r3
+ // Step (5)
+ // S = 3/2 + 5/2 * e in f10
+ (p6) fma.s1 f10=f11,f9,f10
+ nop.i 0
+} { .mfi
+ nop.m 0
+ // Step (6)
+ // e2 = e * e in f11
+ (p6) fma.s1 f11=f9,f9,f0
+ nop.i 0;;
+} { .mfi
+ nop.m 0
+ // Step (7)
+ // t = 63/8 + 231/16 * e in f12
+ (p6) fma.s1 f12=f13,f9,f12
+ nop.i 0;;
+} { .mfi
+ nop.m 0
+ // Step (8)
+ // S1 = e + e2 * S in f10
+ (p6) fma.s1 f10=f11,f10,f9
+ nop.i 0
+} { .mfi
+ nop.m 0
+ // Step (9)
+ // e4 = e2 * e2 in f11
+ (p6) fma.s1 f11=f11,f11,f0
+ nop.i 0;;
+} { .mfi
+ nop.m 0
+ // Step (10)
+ // t1 = 35/8 + e * t in f9
+ (p6) fma.s1 f9=f9,f12,f14
+ nop.i 0;;
+} { .mfi
+ nop.m 0
+ // Step (11)
+ // G = g + S1 * g in f12
+ (p6) fma.s1 f12=f10,f7,f7
+ nop.i 0
+} { .mfi
+ nop.m 0
+ // Step (12)
+ // E = g * e4 in f7
+ (p6) fma.s1 f7=f7,f11,f0
+ nop.i 0;;
+} { .mfi
+ nop.m 0
+ // Step (13)
+ // u = S1 + e4 * t1 in f10
+ (p6) fma.s1 f10=f11,f9,f10
+ nop.i 0;;
+} { .mfi
+ nop.m 0
+ // Step (14)
+ // g1 = G + t1 * E in f7
+ (p6) fma.d.s1 f7=f9,f7,f12
+ nop.i 0;;
+} { .mfi
+ nop.m 0
+ // Step (15)
+ // h1 = h + u * h in f6
+ (p6) fma.s1 f6=f10,f6,f6
+ nop.i 0;;
+} { .mfi
+ nop.m 0
+ // Step (16)
+ // d = a - g1 * g1 in f9
+ (p6) fnma.s1 f9=f7,f7,f8
+ nop.i 0;;
+} { .mfb
+ nop.m 0
+ // Step (17)
+ // g2 = g1 + d * h1 in f7
+ (p6) fma.d.s0 f8=f9,f6,f7
+ (p6) br.ret.sptk b0 ;;
+}
+
+{ .mfb
+ nop.m 0
+ (p0) mov f8 = f7
+ (p8) br.ret.sptk b0 ;;
+}
+{ .mfb
+ (p7) mov r40 = 49
+ nop.f 0
+ (p7) br.cond.sptk __libm_error_region ;;
+}
+// END DOUBLE PRECISION MINIMUM LATENCY SQUARE ROOT ALGORITHM
+.endp sqrt#
+ASM_SIZE_DIRECTIVE(sqrt)
+#ifdef _LIBC
+ASM_SIZE_DIRECTIVE(__sqrt)
+ASM_SIZE_DIRECTIVE(__ieee754_sqrt)
+#endif
+
+// Stack operations when calling error support.
+// (1) (2) (3) (call) (4)
+// sp -> + psp -> + psp -> + sp -> +
+// | | | |
+// | | <- GR_Y R3 ->| <- GR_RESULT | -> f8
+// | | | |
+// | <-GR_Y Y2->| Y2 ->| <- GR_Y |
+// | | | |
+// | | <- GR_X X1 ->| |
+// | | | |
+// sp-64 -> + sp -> + sp -> + +
+// save ar.pfs save b0 restore gp
+// save gp restore ar.pfs
+
+
+.proc __libm_error_region
+__libm_error_region:
+
+//
+// This branch includes all those special values that are not negative,
+// with the result equal to frcpa(x)
+//
+
+.prologue
+// We are distinguishing between over(under)flow and letting
+// __libm_error_support set ERANGE or do anything else needed.
+
+// (1)
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+
+
+// (2)
+{ .mmi
+ stfd [GR_Parameter_Y] = f0,16 // STORE Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+
+.body
+// (3)
+{ .mib
+ stfd [GR_Parameter_X] = f15 // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
+}
+{ .mib
+ stfd [GR_Parameter_Y] = f8 // STORE Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+
+// (4)
+{ .mmi
+ ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_sqrtf.S b/sysdeps/ia64/fpu/e_sqrtf.S
new file mode 100644
index 0000000..27d0bcf
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_sqrtf.S
@@ -0,0 +1,266 @@
+.file "sqrtf.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// *********************************************************************
+// History:
+//
+// 2/02/00 Initial version
+// 4/04/00 Unwind support added
+// 8/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+//
+// *********************************************************************
+//
+// Function: Combined sqrtf(x), where
+// _
+// sqrtf(x) = |x, for single precision x values
+//
+// ********************************************************************
+//
+// Accuracy: Correctly Rounded
+//
+// ********************************************************************
+//
+// Resources Used:
+//
+// Floating-Point Registers: f8 (Input and Return Value)
+// f7 -f14
+//
+// General Purpose Registers:
+// r32-r36 (Locals)
+// r37-r40 (Used to pass arguments to error handling routine)
+//
+// Predicate Registers: p6, p7, p8
+//
+// ********************************************************************
+//
+// IEEE Special Conditions:
+//
+// All faults and exceptions should be raised correctly.
+// sqrtf(QNaN) = QNaN
+// sqrtf(SNaN) = QNaN
+// sqrtf(+/-0) = +/-0
+// sqrtf(negative) = QNaN and error handling is called
+//
+// ********************************************************************
+//
+// Implementation:
+//
+// Modified Newton-Raphson Algorithm
+//
+// ********************************************************************
+
+#include "libm_support.h"
+
+GR_SAVE_B0 = r34
+GR_SAVE_PFS = r33
+GR_SAVE_GP = r35
+
+GR_Parameter_X = r37
+GR_Parameter_Y = r38
+GR_Parameter_RESULT = r39
+GR_Parameter_TAG = r40
+
+FR_X = f13
+FR_Y = f0
+FR_RESULT = f8
+
+
+
+.section .text
+.proc sqrtf#
+.global sqrtf#
+.align 64
+
+sqrtf:
+#ifdef _LIBC
+.global __sqrtf
+.type __sqrtf,@function
+__sqrtf:
+.global __ieee754_sqrtf
+.type __ieee754_sqrtf,@function
+__ieee754_sqrtf:
+#endif
+{ .mlx
+ // BEGIN SINGLE PRECISION MINIMUM LATENCY SQUARE ROOT ALGORITHM
+ alloc r32= ar.pfs,0,5,4,0
+ // exponent of +1/2 in r2
+ movl r2 = 0x0fffe
+} { .mfi
+ // +1/2 in f12
+ nop.m 0
+ frsqrta.s0 f7,p6=f8
+ nop.i 0;;
+} { .mfi
+ setf.exp f12 = r2
+ // Step (1)
+ // y0 = 1/sqrt(a) in f7
+ fclass.m.unc p7,p8 = f8,0x3A
+ nop.i 0
+} { .mfi
+ nop.m 0
+ // Make a copy of x just in case
+ mov f13 = f8
+ nop.i 0;;
+} { .mfi
+ nop.m 0
+ // Step (2)
+ // H0 = 1/2 * y0 in f9
+ (p6) fma.s1 f9=f12,f7,f0
+ nop.i 0
+} { .mfi
+ nop.m 0
+ // Step (3)
+ // S0 = a * y0 in f7
+ (p6) fma.s1 f7=f8,f7,f0
+ nop.i 0;;
+} { .mfi
+ nop.m 0
+ // Step (4)
+ // d = 1/2 - S0 * H0 in f10
+ (p6) fnma.s1 f10=f7,f9,f12
+ nop.i 0
+} { .mfi
+ nop.m 0
+ // Step (0'')
+ // 3/2 = 1 + 1/2 in f12
+ (p6) fma.s1 f12=f12,f1,f1
+ nop.i 0;;
+} { .mfi
+ nop.m 0
+ // Step (5)
+ // e = 1 + 3/2 * d in f12
+ (p6) fma.s1 f12=f12,f10,f1
+ nop.i 0
+} { .mfi
+ nop.m 0
+ // Step (6)
+ // T0 = d * S0 in f11
+ (p6) fma.s1 f11=f10,f7,f0
+ nop.i 0;;
+} { .mfi
+ nop.m 0
+ // Step (7)
+ // G0 = d * H0 in f10
+ (p6) fma.s1 f10=f10,f9,f0
+ nop.i 0;;
+} { .mfi
+ nop.m 0
+ // Step (8)
+ // S1 = S0 + e * T0 in f7
+ (p6) fma.s.s1 f7=f12,f11,f7
+ nop.i 0;;
+} { .mfi
+ nop.m 0
+ // Step (9)
+ // H1 = H0 + e * G0 in f12
+ (p6) fma.s1 f12=f12,f10,f9
+ nop.i 0;;
+} { .mfi
+ nop.m 0
+ // Step (10)
+ // d1 = a - S1 * S1 in f9
+ (p6) fnma.s1 f9=f7,f7,f8
+ nop.i 0;;;
+} { .mfb
+ nop.m 0
+ // Step (11)
+ // S = S1 + d1 * H1 in f7
+ (p6) fma.s.s0 f8=f9,f12,f7
+ (p6) br.ret.sptk b0 ;;
+// END SINGLE PRECISION MINIMUM LATENCY SQUARE ROOT ALGORITHM
+} { .mfb
+ nop.m 0
+ (p0) mov f8 = f7
+ (p8) br.ret.sptk b0 ;;
+}
+//
+// This branch includes all those special values that are not negative,
+// with the result equal to frcpa(x)
+//
+.endp sqrtf
+ASM_SIZE_DIRECTIVE(sqrtf)
+#ifdef _LIBC
+ASM_SIZE_DIRECTIVE(__sqrtf)
+ASM_SIZE_DIRECTIVE(__ieee754_sqrtf)
+#endif
+
+
+.proc __libm_error_region
+__libm_error_region:
+.prologue
+{ .mii
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+(p0) mov GR_Parameter_TAG = 50
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfs [GR_Parameter_Y] = FR_Y,16 // Store Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mib
+ stfs [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
+}
+{ .mib
+ stfs [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+{ .mmi
+ ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_sqrtl.S b/sysdeps/ia64/fpu/e_sqrtl.S
new file mode 100644
index 0000000..4054cf0
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_sqrtl.S
@@ -0,0 +1,281 @@
+.file "sqrtl.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// ********************************************************************
+//
+// History:
+// 2/02/00 (hand-optimized)
+// 4/04/00 Unwind support added
+// 8/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+//
+// ********************************************************************
+//
+// Function: Combined sqrtl(x), where
+// _
+// sqrtl(x) = |x, for double-extended precision x values
+//
+// ********************************************************************
+//
+// Resources Used:
+//
+// Floating-Point Registers: f8 (Input and Return Value)
+// f7 -f14
+//
+// General Purpose Registers:
+// r32-r36 (Locals)
+// r37-r40 (Used to pass arguments to error handling routine)
+//
+// Predicate Registers: p6, p7, p8
+//
+// ********************************************************************
+//
+// IEEE Special Conditions:
+//
+// All faults and exceptions should be raised correctly.
+// sqrtl(QNaN) = QNaN
+// sqrtl(SNaN) = QNaN
+// sqrtl(+/-0) = +/-0
+// sqrtl(negative) = QNaN and error handling is called
+//
+// ********************************************************************
+//
+// Implementation:
+//
+// Modified Newton-Raphson Algorithm
+//
+// ********************************************************************
+
+#include "libm_support.h"
+
+GR_SAVE_PFS = r33
+GR_SAVE_B0 = r34
+GR_SAVE_GP = r35
+GR_Parameter_X = r37
+GR_Parameter_Y = r38
+GR_Parameter_RESULT = r39
+GR_Parameter_TAG = r40
+
+FR_X = f15
+FR_Y = f0
+FR_RESULT = f8
+
+.section .text
+.proc sqrtl#
+.global sqrtl#
+.align 64
+
+sqrtl:
+#ifdef _LIBC
+.global __sqrtl
+.type __sqrtl,@function
+__sqrtl:
+.global __ieee754_sqrtl
+.type __ieee754_sqrtl,@function
+__ieee754_sqrtl:
+#endif
+{ .mlx
+alloc r32= ar.pfs,0,5,4,0
+ // exponent of +1/2 in r2
+ movl r2 = 0x0fffe;;
+} { .mfi
+ // +1/2 in f10
+ setf.exp f12 = r2
+ // Step (1)
+ // y0 = 1/sqrt(a) in f7
+ frsqrta.s0 f7,p6=f8
+ nop.i 0;;
+} { .mfi
+ nop.m 0
+ // Step (2)
+ // H0 = +1/2 * y0 in f9
+ (p6) fma.s1 f9=f12,f7,f0
+ nop.i 0
+} { .mfi
+ nop.m 0
+ // Step (3)
+ // S0 = a * y0 in f7
+ (p6) fma.s1 f7=f8,f7,f0
+ nop.i 0;;
+} { .mfi
+ nop.m 0
+ // Make copy input x
+ mov f13=f8
+ nop.i 0
+} { .mfi
+ nop.m 0
+ fclass.m.unc p7,p8 = f8,0x3A
+ nop.i 0;;
+} { .mfi
+ nop.m 0
+ // Step (4)
+ // d0 = 1/2 - S0 * H0 in f10
+ (p6) fnma.s1 f10=f7,f9,f12
+ nop.i 0;;
+}
+{ .mfi
+ nop.m 0
+ (p0) mov f15=f8
+ nop.i 0;;
+} { .mfi
+ nop.m 0
+ // Step (5)
+ // H1 = H0 + d0 * H0 in f9
+ (p6) fma.s1 f9=f10,f9,f9
+ nop.i 0
+} { .mfi
+ nop.m 0
+ // Step (6)
+ // S1 = S0 + d0 * S0 in f7
+ (p6) fma.s1 f7=f10,f7,f7
+ nop.i 0;;
+} { .mfi
+ nop.m 0
+ // Step (7)
+ // d1 = 1/2 - S1 * H1 in f10
+ (p6) fnma.s1 f10=f7,f9,f12
+ nop.i 0;;
+} { .mfi
+ nop.m 0
+ // Step (8)
+ // H2 = H1 + d1 * H1 in f9
+ (p6) fma.s1 f9=f10,f9,f9
+ nop.i 0
+} { .mfi
+ nop.m 0
+ // Step (9)
+ // S2 = S1 + d1 * S1 in f7
+ (p6) fma.s1 f7=f10,f7,f7
+ nop.i 0;;
+} { .mfi
+ nop.m 0
+ // Step (10)
+ // d2 = 1/2 - S2 * H2 in f10
+ (p6) fnma.s1 f10=f7,f9,f12
+ nop.i 0
+} { .mfi
+ nop.m 0
+ // Step (11)
+ // e2 = a - S2 * S2 in f12
+ (p6) fnma.s1 f12=f7,f7,f8
+ nop.i 0;;
+} { .mfi
+ nop.m 0
+ // Step (12)
+ // S3 = S2 + d2 * S2 in f7
+ (p6) fma.s1 f7=f12,f9,f7
+ nop.i 0
+} { .mfi
+ nop.m 0
+ // Step (13)
+ // H3 = H2 + d2 * H2 in f9
+ (p6) fma.s1 f9=f10,f9,f9
+ nop.i 0;;
+} { .mfi
+ nop.m 0
+ // Step (14)
+ // e3 = a - S3 * S3 in f12
+ (p6) fnma.s1 f12=f7,f7,f8
+ nop.i 0;;
+} { .mfb
+ nop.m 0
+ // Step (15)
+ // S = S3 + e3 * H3 in f7
+ (p6) fma.s0 f8=f12,f9,f7
+ (p6) br.ret.sptk b0 ;;
+}
+{ .mfb
+ (p0) mov GR_Parameter_TAG = 48
+ (p0) mov f8 = f7
+ (p8) br.ret.sptk b0 ;;
+}
+//
+// This branch includes all those special values that are not negative,
+// with the result equal to frcpa(x)
+//
+
+
+// END DOUBLE EXTENDED PRECISION MINIMUM LATENCY SQUARE ROOT ALGORITHM
+.endp sqrtl#
+ASM_SIZE_DIRECTIVE(sqrtl)
+#ifdef _LIBC
+ASM_SIZE_DIRECTIVE(__sqrtl)
+ASM_SIZE_DIRECTIVE(__ieee754_sqrtl)
+#endif
+
+.proc __libm_error_region
+__libm_error_region:
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfe [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mib
+ stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
+}
+{ .mib
+ stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+{ .mmi
+ ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/k_rem_pio2.c b/sysdeps/ia64/fpu/k_rem_pio2.c
new file mode 100644
index 0000000..41254ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/k_rem_pio2.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/k_rem_pio2f.c b/sysdeps/ia64/fpu/k_rem_pio2f.c
new file mode 100644
index 0000000..41254ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/k_rem_pio2f.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/k_rem_pio2l.c b/sysdeps/ia64/fpu/k_rem_pio2l.c
new file mode 100644
index 0000000..41254ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/k_rem_pio2l.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/libm-test-ulps b/sysdeps/ia64/fpu/libm-test-ulps
index 022113f..20510fa 100644
--- a/sysdeps/ia64/fpu/libm-test-ulps
+++ b/sysdeps/ia64/fpu/libm-test-ulps
@@ -1,31 +1,16 @@
# Begin of automatic generation
-# acos
-Test "acos (0.7) == 0.7953988301841435554":
-float: 1
-ifloat: 1
-ildouble: 1150
-ldouble: 1150
-
# acosh
Test "acosh (7) == 2.6339157938496334172":
ldouble: 1
ildouble: 1
# asin
-Test "asin (-0.5) == -pi/6":
-float: 2
-ifloat: 2
-Test "asin (0.5) == pi/6":
-float: 2
-ifloat: 2
Test "asin (0.7) == 0.77539749661075306374035335271498708":
-float: 2
-ifloat: 2
double: 1
idouble: 1
-ildouble: 1147
-ldouble: 1147
+ldouble: 1
+ildouble: 1
# asinh
Test "asinh (0.7) == 0.652666566082355786":
@@ -33,17 +18,9 @@ ildouble: 656
ldouble: 656
# atan
-Test "atan (0.7) == 0.6107259643892086165":
-ildouble: 549
-ldouble: 549
-
-# atan2
-Test "atan2 (0.4, 0.0003) == 1.5700463269355215718":
-ildouble: 1
-ldouble: 1
-Test "atan2 (0.7, 1) == 0.6107259643892086165":
-ildouble: 549
-ldouble: 549
+#Test "atan (0.7) == 0.6107259643892086165":
+#ildouble: 549
+#ldouble: 549
# atanh
Test "atanh (0.7) == 0.8673005276940531944":
@@ -98,8 +75,8 @@ double: 1
float: 7
idouble: 1
ifloat: 7
-ildouble: 5
-ldouble: 5
+ildouble: 6
+ldouble: 6
Test "Imaginary part of: cacosh (-2 - 3 i) == -1.9833870299165354323 + 2.1414491111159960199 i":
double: 1
float: 4
@@ -215,27 +192,9 @@ ildouble: 447
ldouble: 447
# cbrt
-Test "cbrt (-0.001) == -0.1":
-ildouble: 717
-ldouble: 717
-Test "cbrt (-27.0) == -3.0":
-double: 1
-idouble: 1
-ildouble: 948
-ldouble: 948
Test "cbrt (0.7) == 0.8879040017426007084":
double: 1
idouble: 1
-ildouble: 346
-ldouble: 346
-Test "cbrt (0.970299) == 0.99":
-double: 1
-idouble: 1
-ildouble: 306
-ldouble: 306
-Test "cbrt (8) == 2":
-ildouble: 191
-ldouble: 191
# ccos
Test "Real part of: ccos (-2 - 3 i) == -4.1896256909688072301 - 9.1092278937553365979 i":
@@ -453,15 +412,17 @@ ldouble: 0.25
# cosh
Test "cosh (0.7) == 1.255169005630943018":
-ildouble: 309
-ldouble: 309
+ildouble: 2
+ldouble: 2
# cpow
Test "Real part of: cpow (2 + 3 i, 4 + 0 i) == -119.0 - 120.0 i":
double: 1
-float: 4
+float: 5
idouble: 1
-ifloat: 4
+ifloat: 5
+ldouble: 1
+ildouble: 1
Test "Imaginary part of: cpow (2 + 3 i, 4 + 0 i) == -119.0 - 120.0 i":
float: 2
ifloat: 2
@@ -475,6 +436,9 @@ idouble: 1.104
ifloat: 2.5333
ildouble: 1
ldouble: 1
+Test "Real part of: cpow (2 + 0 i, 10 + 0 i) == 1024.0 + 0.0 i":
+ldouble: 1
+ildouble: 1
# csin
Test "Real part of: csin (0.7 + 1.2 i) == 1.1664563419657581376 + 1.1544997246948547371 i":
@@ -575,8 +539,8 @@ ldouble: 2
Test "Imaginary part of: ctanh (-2 - 3 i) == -0.9653858790221331242 + 0.0098843750383224937 i":
float: 1
ifloat: 1
-ildouble: 23
-ldouble: 23
+ildouble: 24
+ldouble: 24
Test "Real part of: ctanh (0 + pi/4 i) == 0.0 + 1.0 i":
Test "Imaginary part of: ctanh (0 + pi/4 i) == 0.0 + 1.0 i":
float: 1
@@ -655,6 +619,8 @@ float: 1
ifloat: 1
double: 1
idouble: 1
+ldouble: 1
+ildouble: 1
# fmod
Test "fmod (-6.5, -2.3) == -1.9":
@@ -906,21 +872,17 @@ ildouble: 725
ldouble: 725
# sin
-Test "sin (0.7) == 0.64421768723769105367":
-ildouble: 627
-ldouble: 627
+Test "sin (0.7) == 0.64421768723769105367261435139872014":
+ildouble: 1
+ldouble: 1
# sincos
-Test "sincos (0.7, &sin_res, &cos_res) puts 0.64421768723769105367 in sin_res":
-ildouble: 627
-ldouble: 627
+Test "sincos (0.7, &sin_res, &cos_res) puts 0.64421768723769105367261435139872014 in sin_res":
+ldouble: 1
+ildouble: 1
Test "sincos (0.7, &sin_res, &cos_res) puts 0.76484218728448842625585999019186495 in cos_res":
-float: 1
-ifloat: 1
double: 1
idouble: 1
-ildouble: 528
-ldouble: 528
Test "sincos (M_PI_6l*2.0, &sin_res, &cos_res) puts 0.5 in cos_res":
double: 1
float: 0.5
@@ -1005,6 +967,8 @@ float: 1
ifloat: 1
double: 2
idouble: 2
+ldouble: 2
+ildouble: 2
Test "y0 (1.0) == 0.088256964215676957983":
double: 2
float: 1
@@ -1028,6 +992,8 @@ float: 1
ifloat: 1
double: 1
idouble: 1
+ldouble: 1
+ildouble: 1
# y1
Test "y1 (0.1) == -6.4589510947020269877":
@@ -1174,17 +1140,11 @@ idouble: 1
ifloat: 1
# Maximal error of functions:
-Function: "acos":
-ildouble: 1149
-ldouble: 1149
-
Function: "asin":
-float: 2
-ifloat: 2
double: 1
idouble: 1
-ildouble: 1147
-ldouble: 1147
+ldouble: 1
+ildouble: 1
Function: "asinh":
double: 1
@@ -1192,14 +1152,6 @@ idouble: 1
ildouble: 656
ldouble: 656
-Function: "atan":
-ildouble: 549
-ldouble: 549
-
-Function: "atan2":
-ildouble: 549
-ldouble: 549
-
Function: "atanh":
double: 1
idouble: 1
@@ -1305,8 +1257,6 @@ ldouble: 447
Function: "cbrt":
double: 1
idouble: 1
-ildouble: 948
-ldouble: 948
Function: Real part of "ccos":
double: 1
@@ -1389,22 +1339,24 @@ ildouble: 529
ldouble: 529
Function: "cosh":
-ildouble: 309
-ldouble: 309
+ildouble: 2
+ldouble: 2
Function: Real part of "cpow":
double: 1
-float: 4
+float: 5
idouble: 1
-ifloat: 4
+ifloat: 5
+ldouble: 1
+ildouble: 1
Function: Imaginary part of "cpow":
double: 1.104
float: 2.5333
idouble: 1.104
ifloat: 2.5333
-ildouble: 2
-ldouble: 2
+ildouble: 4
+ldouble: 4
Function: Real part of "csin":
float: 1
@@ -1639,6 +1591,8 @@ double: 2
float: 1
idouble: 2
ifloat: 1
+ldouble: 2
+ildouble: 2
Function: "y1":
double: 3
diff --git a/sysdeps/ia64/fpu/libm_atan2_reg.S b/sysdeps/ia64/fpu/libm_atan2_reg.S
new file mode 100644
index 0000000..7a0c703
--- /dev/null
+++ b/sysdeps/ia64/fpu/libm_atan2_reg.S
@@ -0,0 +1,1221 @@
+.file "libm_atan2_reg.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00: Initial version
+// 4/04/00 Unwind support added
+
+#include "libm_support.h"
+
+.data
+
+.align 64
+ASM_TYPE_DIRECTIVE(Constants_atan#,@object)
+Constants_atan:
+data4 0x54442D18, 0x3FF921FB, 0x248D3132, 0x3E000000
+// double pi/2, single lo_pi/2, two**(-3)
+data4 0xAAAAAAA3, 0xAAAAAAAA, 0x0000BFFD, 0x00000000 // P_1
+data4 0xCCCC54B2, 0xCCCCCCCC, 0x00003FFC, 0x00000000 // P_2
+data4 0x47E4D0C2, 0x92492492, 0x0000BFFC, 0x00000000 // P_3
+data4 0x58870889, 0xE38E38E0, 0x00003FFB, 0x00000000 // P_4
+data4 0x290149F8, 0xBA2E895B, 0x0000BFFB, 0x00000000 // P_5
+data4 0x250F733D, 0x9D88E6D4, 0x00003FFB, 0x00000000 // P_6
+data4 0xFB8745A0, 0x884E51FF, 0x0000BFFB, 0x00000000 // P_7
+data4 0x394396BD, 0xE1C7412B, 0x00003FFA, 0x00000000 // P_8
+data4 0xAAAAA52F, 0xAAAAAAAA, 0x0000BFFD, 0x00000000 // Q_1
+data4 0xC75B60D3, 0xCCCCCCCC, 0x00003FFC, 0x00000000 // Q_2
+data4 0x011F1940, 0x924923AD, 0x0000BFFC, 0x00000000 // Q_3
+data4 0x2A5F89BD, 0xE36F716D, 0x00003FFB, 0x00000000 // Q_4
+// Entries Tbl_hi (double precision)
+// B = 1+Index/16+1/32 Index = 0
+// Entries Tbl_lo (single precision)
+// B = 1+Index/16+1/32 Index = 0
+data4 0xA935BD8E, 0x3FE9A000, 0x23ACA08F, 0x00000000
+// Entries Tbl_hi (double precision) Index = 0,1,...,15
+// B = 2^(-1)*(1+Index/16+1/32)
+// Entries Tbl_lo (single precision)
+// Index = 0,1,...,15 B = 2^(-1)*(1+Index/16+1/32)
+data4 0x7F175A34, 0x3FDE77EB, 0x238729EE, 0x00000000
+data4 0x73C1A40B, 0x3FE0039C, 0x249334DB, 0x00000000
+data4 0x5B5B43DA, 0x3FE0C614, 0x22CBA7D1, 0x00000000
+data4 0x88BE7C13, 0x3FE1835A, 0x246310E7, 0x00000000
+data4 0xE2CC9E6A, 0x3FE23B71, 0x236210E5, 0x00000000
+data4 0x8406CBCA, 0x3FE2EE62, 0x2462EAF5, 0x00000000
+data4 0x1CD41719, 0x3FE39C39, 0x24B73EF3, 0x00000000
+data4 0x5B795B55, 0x3FE44506, 0x24C11260, 0x00000000
+data4 0x5BB6EC04, 0x3FE4E8DE, 0x242519EE, 0x00000000
+data4 0x1F732FBA, 0x3FE587D8, 0x24D4346C, 0x00000000
+data4 0x115D7B8D, 0x3FE6220D, 0x24ED487B, 0x00000000
+data4 0x920B3D98, 0x3FE6B798, 0x2495FF1E, 0x00000000
+data4 0x8FBA8E0F, 0x3FE74897, 0x223D9531, 0x00000000
+data4 0x289FA093, 0x3FE7D528, 0x242B0411, 0x00000000
+data4 0x576CC2C5, 0x3FE85D69, 0x2335B374, 0x00000000
+data4 0xA99CC05D, 0x3FE8E17A, 0x24C27CFB, 0x00000000
+//
+// Entries Tbl_hi (double precision) Index = 0,1,...,15
+// B = 2^(-2)*(1+Index/16+1/32)
+// Entries Tbl_lo (single precision)
+// Index = 0,1,...,15 B = 2^(-2)*(1+Index/16+1/32)
+//
+data4 0x510665B5, 0x3FD025FA, 0x24263482, 0x00000000
+data4 0x362431C9, 0x3FD1151A, 0x242C8DC9, 0x00000000
+data4 0x67E47C95, 0x3FD20255, 0x245CF9BA, 0x00000000
+data4 0x7A823CFE, 0x3FD2ED98, 0x235C892C, 0x00000000
+data4 0x29271134, 0x3FD3D6D1, 0x2389BE52, 0x00000000
+data4 0x586890E6, 0x3FD4BDEE, 0x24436471, 0x00000000
+data4 0x175E0F4E, 0x3FD5A2E0, 0x2389DBD4, 0x00000000
+data4 0x9F5FA6FD, 0x3FD68597, 0x2476D43F, 0x00000000
+data4 0x52817501, 0x3FD76607, 0x24711774, 0x00000000
+data4 0xB8DF95D7, 0x3FD84422, 0x23EBB501, 0x00000000
+data4 0x7CD0C662, 0x3FD91FDE, 0x23883A0C, 0x00000000
+data4 0x66168001, 0x3FD9F930, 0x240DF63F, 0x00000000
+data4 0x5422058B, 0x3FDAD00F, 0x23FE261A, 0x00000000
+data4 0x378624A5, 0x3FDBA473, 0x23A8CD0E, 0x00000000
+data4 0x0AAD71F8, 0x3FDC7655, 0x2422D1D0, 0x00000000
+data4 0xC9EC862B, 0x3FDD45AE, 0x2344A109, 0x00000000
+//
+// Entries Tbl_hi (double precision) Index = 0,1,...,15
+// B = 2^(-3)*(1+Index/16+1/32)
+// Entries Tbl_lo (single precision)
+// Index = 0,1,...,15 B = 2^(-3)*(1+Index/16+1/32)
+//
+data4 0x84212B3D, 0x3FC068D5, 0x239874B6, 0x00000000
+data4 0x41060850, 0x3FC16465, 0x2335E774, 0x00000000
+data4 0x171A535C, 0x3FC25F6E, 0x233E36BE, 0x00000000
+data4 0xEDEB99A3, 0x3FC359E8, 0x239680A3, 0x00000000
+data4 0xC6092A9E, 0x3FC453CE, 0x230FB29E, 0x00000000
+data4 0xBA11570A, 0x3FC54D18, 0x230C1418, 0x00000000
+data4 0xFFB3AA73, 0x3FC645BF, 0x23F0564A, 0x00000000
+data4 0xE8A7D201, 0x3FC73DBD, 0x23D4A5E1, 0x00000000
+data4 0xE398EBC7, 0x3FC8350B, 0x23D4ADDA, 0x00000000
+data4 0x7D050271, 0x3FC92BA3, 0x23BCB085, 0x00000000
+data4 0x601081A5, 0x3FCA217E, 0x23BC841D, 0x00000000
+data4 0x574D780B, 0x3FCB1696, 0x23CF4A8E, 0x00000000
+data4 0x4D768466, 0x3FCC0AE5, 0x23BECC90, 0x00000000
+data4 0x4E1D5395, 0x3FCCFE65, 0x2323DCD2, 0x00000000
+data4 0x864C9D9D, 0x3FCDF110, 0x23F53F3A, 0x00000000
+data4 0x451D980C, 0x3FCEE2E1, 0x23CCB11F, 0x00000000
+data4 0x54442D18, 0x400921FB, 0x33145C07, 0x3CA1A626 // I two doubles
+data4 0x54442D18, 0x3FF921FB, 0x33145C07, 0x3C91A626 // I_by_2 two dbls
+data4 0x54442D18, 0x3FE921FB, 0x33145C07, 0x3C81A626 // I_by_4 two dbls
+data4 0x7F3321D2, 0x4002D97C, 0x4C9E8A0A, 0x3C9A7939 // 3I_by_4 two dbls
+ASM_SIZE_DIRECTIVE(Constants_atan#)
+.section .text
+
+.proc __libm_atan2_reg#
+.global __libm_atan2_reg#
+.align 64
+__libm_atan2_reg:
+
+
+{ .mfi
+ alloc r32 = ar.pfs,0,20,4,0
+(p0) mov f32 = f8
+ nop.i 0
+}
+{ .mmi
+ nop.m 0
+(p0) addl r39 = @ltoff(Constants_atan#), gp
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ld8 r39 = [r39]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop 999 // EMbo added ...
+(p0) mov f33 = f9
+ nop.i 0
+ } { .mfi
+ nop 999 // EMbo added ...
+(p0) fclass.nm.unc p9,p0 = f32 ,0x1FF
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p0) fclass.nm.unc p8,p0 = f33 ,0x1FF
+ nop 999 // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p0) fclass.m.unc p6,p0 = f33 ,0x103
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p0) fclass.m.unc p7,p0 = f32 ,0x103
+ nop 999 // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p0) fclass.m.unc p12,p0 = f33 ,0x0C3
+ nop 999;; // EMbo added ...
+ } { .mfb
+ nop 999 // EMbo added ...
+//
+// Check for NatVals.
+// Check for EM Unsupporteds
+// Check for NaNs.
+//
+(p0) fclass.m.unc p13,p0 = f32 ,0x0C3
+(p6) br.cond.sptk L(ATAN_NATVAL);;
+ } { .mbb
+ nop 999 // EMbo added ...
+(p7) br.cond.sptk L(ATAN_NATVAL)
+(p8) br.cond.sptk L(ATAN_UNSUPPORTED);;
+ } { .mib
+(p0) add r40 = 96, r39
+ nop 999 // EMbo added ...
+(p9) br.cond.sptk L(ATAN_UNSUPPORTED);;
+ } { .mib
+(p0) ldfd f50 = [r39],8
+ nop 999 // EMbo added ...
+(p12) br.cond.sptk L(ATAN_NAN);;
+ } { .mfb
+ nop 999 // EMbo added ...
+(p0) fnorm.s1 f33 = f33
+(p13) br.cond.sptk L(ATAN_NAN);;
+ } { .mfi
+(p0) ldfs f51 = [r39],4
+//
+// Remove sign bits from exponents
+// Load 2**(-3)
+// Normalize the input argument.
+//
+(p0) fnorm.s1 f32 = f32
+ nop 999 // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p0) mov f82 = f1
+ nop 999;; // EMbo added ...
+ } { .mmi
+ nop 999;; // EMbo added ...
+(p0) ldfs f78 = [r39],180
+ nop 999;; // EMbo added ...
+ } { .mmi
+(p0) getf.exp r36 = f33;;
+//
+// Get exp and sign of ArgX
+// Get exp and sign of ArgY
+// Load 2**(-3) and increment ptr to Q_4.
+//
+(p0) getf.exp r37 = f32
+(p0) shr.u r36 = r36,17;;
+ } { .mfi
+ nop 999 // EMbo added ...
+(p0) fmerge.s f84 = f1,f32
+(p0) shr.u r37 = r37,17;;
+ } { .mfi
+ nop 999 // EMbo added ...
+//
+// ArgX_abs = |ArgX|
+// ArgY_abs = |ArgY|
+// sign_X is sign bit of ArgX
+// sign_Y is sign bit of ArgY
+//
+(p0) fmerge.s f83 = f1,f33
+(p0) cmp.eq.unc p8,p9 = 0x00000, r37;;
+ } { .mfi
+ nop 999 // EMbo added ...
+(p8) fadd.s1 f34 = f0, f1
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p9) fsub.s1 f34 = f0, f1
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p0) fmin.s1 f36 = f83, f84
+ nop 999 // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p0) fmax.s1 f35 = f83, f84
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+//
+// Is ArgX_abs >= ArgY_abs
+// Is sign_Y == 0?
+//
+(p0) fcmp.ge.s1 p6,p7 = f83,f84
+ nop 999;; // EMbo added ...
+ } { .mii
+(p6) cmp.eq.unc p10, p11 = 0x00000, r36
+(p6) add r38 = r0, r0;;
+//
+// U = max(ArgX_abs,ArgY_abs)
+// V = min(ArgX_abs,ArgY_abs)
+// if p6, swap = 0
+// if p7, swap = 1
+//
+//
+// Let M = 1.0
+// if p8, s_Y = 1.0
+// if p9, s_Y = -1.0
+//
+(p7) add r38 = 1,r0;;
+ } { .mfi
+ nop 999 // EMbo added ...
+(p0) frcpa.s1 f37, p6 = f36, f35
+ nop 999;; // EMbo added ...
+ } { .mfb
+ nop 999 // EMbo added ...
+//
+// E = frcpa(V,U)
+//
+(p10) fsub.s1 f82 = f82, f1
+(p6) br.cond.sptk L(ATAN_STEP2);;
+ } { .mib
+ nop 999 // EMbo added ...
+ nop 999 // EMbo added ...
+// /**************************************************/
+// /********************* STEP2 **********************/
+// /**************************************************/
+(p0) br.cond.spnt L(ATAN_SPECIAL_HANDLING);;
+ }
+L(ATAN_STEP2):
+ { .mlx
+ nop 999 // EMbo added ...
+(p0) movl r47 = 0x8400000000000000
+ } { .mlx
+ nop 999 // EMbo added ...
+(p0) movl r48 = 0x0000000000000100;;
+ } { .mfi
+ nop 999 // EMbo added ...
+(p0) fmpy.s1 f38 = f37, f36
+ nop 999 // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p0) fcmp.lt.unc.s0 p0,p9 = f9,f1
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p0) fcmp.lt.unc.s0 p0,p8 = f8,f1
+ nop 999 // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+//
+// Q = E * V
+//
+(p11) fadd.s1 f82 = f82, f1
+ nop 999;; // EMbo added ...
+ } { .mfi
+(p0) getf.sig r46 = f38
+(p0) fcmp.lt.unc p6,p7 = f38,f78
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p0) fmpy.s1 f38 = f37, f36
+(p0) extr.u r42 = r46, 59, 4;;
+ } { .mfi
+ nop 999 // EMbo added ...
+(p0) fmpy.s1 f50 = f82, f50
+(p0) dep r47 = r42, r47, 59, 4
+ } { .mfi
+ nop 999 // EMbo added ...
+(p0) fmpy.s1 f51 = f82, f51
+ nop 999;; // EMbo added ...
+ } { .mmi
+ nop 999;; // EMbo added ...
+//
+// Is Q < 2**(-3)?
+//
+//
+// Do fcmp to raise any denormal operand
+// exceptions.
+//
+(p0) getf.exp r45 = f38
+ nop 999;; // EMbo added ...
+ } { .mib
+//
+// lookup = b_1 b_2 b_3 B_4
+//
+//
+// Generate 1.b_1 b_2 b_3 b_4 1 0 0 0 ... 0
+//
+(p0) andcm r41 = 0x0003, r45
+ nop 999 // EMbo added ...
+//
+// We waited a few extra cycles so P_lo and P_hi could be calculated.
+// Load the constant 256 for loading up table entries.
+//
+// /**************************************************/
+// /********************* STEP3 **********************/
+// /**************************************************/
+(p6) br.cond.spnt L(ATAN_POLY);;
+ } { .mii
+(p0) setf.sig f39 = r47
+(p0) cmp.eq.unc p8, p9 = 0x0000, r41
+//
+// z_hi = s exp 1.b_1 b_2 b_3 b_4 1 0 0 0 ... 0
+// point to beginning of Tbl_hi entries - k = 0.
+//
+(p0) add r40 = 16, r39
+ } { .mmi
+(p0) ldfe f73 = [r39],-16;;
+(p9) sub r41 = r41,r0,1
+(p9) add r40 = 16,r40
+ } { .mfi
+(p8) ldfd f48 = [r40],8
+(p0) fmpy.s1 f50 = f34, f50
+(p0) xor r38 = r36,r38;;
+ } { .mmi
+(p0) ldfe f71 = [r39],-16;;
+(p8) ldfs f49 = [r40],8
+(p9) pmpy2.r r41 = r41,r48;;
+ } { .mfi
+(p0) ldfe f69 = [r39],-16
+//
+// Let z_hi have exponent and sign of original Q
+// Load the Tbl_hi(0) else, increment pointer.
+//
+(p0) fmerge.se f39 = f38,f39
+(p9) shladd r42 = r42,0x0004,r41;;
+ } { .mmi
+(p9) add r40 = r40, r42;;
+(p9) ldfd f48 = [r40],8
+ nop 999;; // EMbo added ...
+ } { .mmi
+(p0) ldfe f67 = [r39],-16;;
+(p9) ldfs f49 = [r40],8
+ nop 999 // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+//
+// U_prime_hi = U + V * z_hi
+// Load the Tbl_lo(0)
+//
+(p0) fma.s1 f40 = f36, f39, f35
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p0) fnma.s1 f42 = f35, f39, f36
+ nop 999 // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p0) mov f52 = f48
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p0) frcpa.s1 f43, p6 = f1, f40
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+//
+// U_prime_lo = U - U_prime_hi
+// k = k * 256 - result can be 0, 256, or 512.
+//
+(p0) fsub.s1 f41 = f35, f40
+(p0) cmp.eq.unc p7, p6 = 0x00000, r38
+ } { .mfi
+ nop 999 // EMbo added ...
+(p0) fmpy.s1 f52 = f34, f52
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p7) fadd.s1 f54 = f0, f1
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p6) fsub.s1 f54 = f0, f1
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p0) fnma.s1 f80 = f43, f40, f1
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p0) fadd.s1 f79 = f41, f40
+ nop 999 // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p0) fma.s1 f41 = f36, f39, f41
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p0) fma.s1 f56 = f54, f52, f50
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p0) fma.s1 f43 = f80, f43, f43
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+//
+// U_prime_lo = U - U_hold
+// lookup -> lookup * 16 + k
+//
+//
+// V_prime = V - U * z_hi
+// U_prime_lo = V * z_hi + U_prime_lo
+//
+(p0) fsub.s1 f79 = f35, f79
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p0) fnma.s1 f80 = f43, f40, f1
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+//
+// C_hi = frcpa(1,U_prime_hi)
+// U_prime_lo = U_prime_lo + U_hold
+//
+//
+// C_hi_hold = 1 - C_hi * U_prime_hi (1)
+//
+//
+// C_hi = C_hi + C_hi * C_hi_hold (1)
+//
+//
+// C_hi_hold = 1 - C_hi * U_prime_hi (2)
+//
+(p0) fadd.s1 f41 = f41, f79
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+//
+// C_hi = C_hi + C_hi * C_hi_hold (2)
+//
+(p0) fma.s1 f43 = f80, f43, f43
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+//
+// C_hi_hold = 1 - C_hi * U_prime_hi (3)
+//
+(p0) fnma.s1 f80 = f43, f40, f1
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+//
+// C_hi = C_hi + C_hi * C_hi_hold (3)
+//
+(p0) fma.s1 f43 = f80, f43, f43
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+//
+// w_hi = V_prime * C_hi
+//
+(p0) fmpy.s1 f44 = f42, f43
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p0) fmpy.s1 f46 = f44, f44
+ nop 999 // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+//
+// wsq = w_hi * w_hi
+// w_lo = = V_prime - w_hi * U_prime_hi
+//
+(p0) fnma.s1 f45 = f44, f40, f42
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p0) fma.s1 f47 = f46, f73, f71
+ nop 999 // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+//
+// poly = Q_3 + wsq * Q_4
+// w_lo = = w_lo - w_hi * U_prime_lo
+//
+(p0) fnma.s1 f45 = f44, f41, f45
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p0) fma.s1 f47 = f46, f47, f69
+ nop 999 // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+//
+// poly = Q_2 + wsq * poly
+// w_lo = = w_lo * C_hi
+//
+(p0) fmpy.s1 f45 = f43, f45
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p0) fma.s1 f47 = f46, f47, f67
+ nop 999 // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+//
+// poly = Q_1 + wsq * poly
+// A_lo = Tbl_lo + w_lo
+// swap = xor(swap,sign_X)
+//
+(p0) fadd.s1 f53 = f49, f45
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+//
+// Is (swap) != 0 ?
+// poly = wsq * poly
+// A_hi = Tbl_hi
+//
+(p0) fmpy.s1 f47 = f46, f47
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+//
+// poly = wsq * poly
+//
+//
+// if (p6) sigma = -1.0
+// if (p7) sigma = 1.0
+//
+(p0) fmpy.s1 f47 = f44, f47
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+//
+// P_hi = s_Y * P_hi
+// A_lo = A_lo + poly
+//
+(p0) fadd.s1 f53 = f53, f47
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+//
+// A_lo = A_lo + w_hi
+// A_hi = s_Y * A_hi
+//
+(p0) fadd.s1 f53 = f53, f44
+ nop 999;; // EMbo added ...
+ } { .mfb
+ nop 999 // EMbo added ...
+//
+// result_hi = P_hi + sigma * A_hi
+// result_lo = P_lo + sigma * A_lo
+//
+(p0) fma.s1 f55 = f54, f53, f51
+(p0) br.cond.sptk L(RETURN_ATAN);;
+}
+//
+// result = result_hi + result_lo * s_Y (User Supplied Rounding Mode)
+//
+// (p0) fma.d.s0 f57 = f55, f34, f56
+//
+// /**************************************************/
+// /********************* STEP4 **********************/
+// /**************************************************/
+//
+L(ATAN_POLY):
+{ .mmi
+(p0) xor r38 = r36,r38
+(p0) addl r39 = @ltoff(Constants_atan#), gp
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ld8 r39 = [r39]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+
+{ .mlx
+ nop 999 // EMbo added ...
+(p0) movl r47 = 0x24005;;
+ } { .mfi
+(p0) add r39 = 128, r39
+(p0) fnma.s1 f81 = f37, f35, f1
+(p0) cmp.eq.unc p7, p6 = 0x00000, r38;;
+ } { .mmf
+ nop 999 // EMbo added ...
+(p0) ldfe f77 = [r39],-16
+//
+// Iterate 3 times E = E + E*(1.0 - E*U)
+// Also load P_8, P_7, P_6, P_5, P_4
+// E_hold = 1.0 - E * U (1)
+// A_temp = Q
+//
+(p0) mov f85 = f38;;
+ } { .mmf
+ nop 999 // EMbo added ...
+(p0) ldfe f76 = [r39],-16
+(p6) fsub.s1 f54 = f0, f1;;
+ } { .mmf
+ nop 999 // EMbo added ...
+(p0) ldfe f75 = [r39],-16
+//
+// E = E + E_hold*E (1)
+// Point to P_8.
+//
+(p0) fma.s1 f37 = f37, f81, f37;;
+ } { .mmf
+ nop 999 // EMbo added ...
+(p0) ldfe f74 = [r39],-16
+(p0) fnma.s1 f64 = f85, f35, f36;;
+ } { .mmf
+ nop 999 // EMbo added ...
+(p0) ldfe f72 = [r39],-16
+(p7) fadd.s1 f54 = f0, f1;;
+ } { .mmf
+ nop 999 // EMbo added ...
+(p0) ldfe f70 = [r39],-16
+//
+// E_hold = 1.0 - E * U (2)
+//
+(p0) fnma.s1 f81 = f37, f35, f1;;
+ } { .mmf
+ nop 999 // EMbo added ...
+(p0) ldfe f68 = [r39],-16
+(p0) fmpy.s1 f50 = f34, f50;;
+ } { .mmf
+ nop 999 // EMbo added ...
+(p0) ldfe f66 = [r39],-16
+(p0) fmpy.d.s0 f67 = f67, f67
+ } { .mfi
+ nop 999 // EMbo added ...
+//
+// E = E + E_hold*E (2)
+//
+(p0) fma.s1 f37 = f37, f81, f37
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+//
+// E_hold = 1.0 - E * U (3)
+//
+(p0) fnma.s1 f81 = f37, f35, f1
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+//
+// E = E + E_hold*E (3)
+// At this point E approximates 1/U to roughly working precision
+// z = V*E approximates V/U
+//
+(p0) fma.s1 f37 = f37, f81, f37
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+//
+// z = V * E
+//
+(p0) fmpy.s1 f59 = f36, f37
+ nop 999 // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p0) fmpy.s1 f64 = f64, f37
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+//
+// zsq = z * z
+// Also load P_3
+//
+(p0) fmpy.s1 f60 = f59, f59
+ nop 999 // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p0) fadd.s1 f52 = f85, f64
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p0) fma.s1 f62 = f60, f77, f76
+ nop 999 // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p0) fma.s1 f63 = f60, f70, f68
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+//
+// z8 = zsq * zsq
+// Also load P_2
+//
+(p0) fmpy.s1 f61 = f60, f60
+ nop 999 // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p0) fsub.s1 f85 = f85, f52
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p0) fmerge.s f65 = f52,f52
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p0) fma.s1 f62 = f60, f62, f75
+ nop 999 // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p0) fma.s1 f63 = f60, f63, f66
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+//
+// z8 = z8 * z8
+// Also load P_1
+// poly1 = _4 + zsq*(P_5 + zsq*(P_6 + zsq*(P_7 + zsq*P_8)))
+// poly2 = zsq*(P_1 + zsq*(P_2 + zsq*P_3))
+//
+//
+// poly1 = P_7 + zsq * P_8
+// poly2 = P_2 + zsq * P_3
+// poly1 = P_4 + zsq*(P_5 + zsq*(P_6 + zsq*poly1))
+// poly2 = zsq*(P_1 + zsq*poly2)
+//
+//
+// poly1 = P_6 + zsq * poly1
+// poly2 = P_1 + zsq * poly2
+// poly1 = P_4 + zsq*(P_5 + zsq*poly1)
+// poly2 = zsq*poly2
+//
+(p0) fmpy.s1 f61 = f61, f61
+ nop 999 // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p0) fadd.s1 f64 = f85, f64
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p0) fma.s1 f62 = f60, f62, f74
+ nop 999 // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+//
+// poly1 = P_5 + zsq * poly1
+// poly2 = zsq * poly2
+// poly1 = P_4 + zsq*poly1
+//
+(p0) fmpy.s1 f63 = f63, f60
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+//
+// poly1 = P_4 + zsq * poly1
+// swap = xor(swap,sign_X)
+//
+(p0) fma.s1 f62 = f60, f62, f72
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+//
+// poly = z8*poly1 + poly2 (Typo in writeup)
+// Is (swap) != 0 ?
+//
+//
+// z_lo = V - A_temp * U
+// if (p7) sigma = 1.0
+// Writeup shows A_temp as A_hi
+//
+//
+// z_lo = z_lo * E
+// if (p6) sigma = -1.0
+// z_lo = (V - A_temp * U) *E
+//
+//
+// Fixup added to force inexact later -
+// A_hi = A_temp + z_lo
+// z_lo = (A_temp - A_hi) + z_lo
+// z_lo = A_hi - z_lo -A_hi + z_lo = about 0
+//
+(p0) fma.s1 f47 = f61, f62, f63
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+//
+// A_lo = z * poly + z_lo
+//
+(p0) fma.s1 f53 = f59, f47, f64
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p0) fadd.s1 f52 = f65, f53
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p0) fsub.s1 f65 = f65, f52
+ nop 999 // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p0) fmpy.s1 f52 = f34, f52
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p0) fadd.s1 f53 = f65, f53
+ nop 999 // EMbo added ...
+ } { .mfi
+(p0) setf.exp f65 = r47
+(p0) fma.s1 f56 = f54, f52, f50
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p0) fclass.m.unc p6,p0 = f53,0x007
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+//
+// P_hi = s_Y * P_hi
+// A_hi = s_Y * A_hi
+//
+//
+// result_hi = P_hi + sigma * A_hi
+//
+(p6) mov f53 = f65
+ nop 999 // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+//
+// tmp = P_hi - result_hi
+//
+(p0) fsub.s1 f65 = f50, f56
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p0) fma.s1 f65 = f52, f54, f65
+ nop 999 // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+//
+// tmp = sigma * A_hi + tmp
+// sigma = A_lo * sigma + P_lo
+//
+(p0) fma.s1 f54 = f53, f54, f51
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+//
+// result_lo = s_Y * sigma + tmp
+//
+(p0) fma.s1 f55 = f34, f54, f65
+ nop 999;; // EMbo added ...
+ } { .mfb
+ nop.m 0
+ mov f34 = f1
+(p0) br.cond.sptk L(RETURN_ATAN);;
+}
+//
+// result = result_hi + result_lo (User Supplied Rounding Mode)
+//
+// (p0) fadd.d.s0 f57 = f55, f56
+L(ATAN_UNSUPPORTED):
+L(ATAN_NATVAL):
+ { .mfb
+ nop 999 // EMbo added ...
+//
+// Deal with the NatVal and unsupported cases.
+// Raise invalid if warrented.
+//
+(p0) fmpy.d.s0 f57 = f8, f9
+br.cond.sptk L(RETURN_ATAN);;
+ }
+L(ATAN_NAN):
+ { .mfb
+ nop 999 // EMbo added ...
+//
+// If only one NaN, then generate the resulting
+// NaN and return - may raise invalid.
+//
+(p0) fmpy.d.s0 f57 = f8, f9
+(p0) br.cond.sptk L(RETURN_ATAN);;
+ }
+L(ATAN_SPECIAL_HANDLING):
+
+ { .mmf
+(p0) addl r39 = @ltoff(Constants_atan#), gp
+ nop.m 999
+(p0) fcmp.lt.s0 p0,p7 = f8,f1
+ }
+;;
+
+//
+// Raise denormal operand faults if necessary
+//
+
+{ .mfi
+ ld8 r39 = [r39]
+(p0) fcmp.lt.s0 p0,p6 = f9,f1
+ nop 999;; // EMbo added ...
+}
+;;
+
+
+
+{ .mfi
+ nop 999 // EMbo added ...
+(p0) fclass.m.unc p6,p7 = f32,0x007
+ nop 999;; // EMbo added ...
+ } { .mlx
+ nop 999 // EMbo added ...
+(p0) movl r47 = 992;;
+ } { .mib
+(p0) add r39 = r39, r47
+ nop 999 // EMbo added ...
+(p7) br.cond.sptk L(ATAN_ArgY_Not_ZERO);;
+ } { .mfi
+ nop 999 // EMbo added ...
+(p6) fclass.m.unc p14,p0 = f33,0x035
+ nop 999 // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p6) fclass.m.unc p15,p0 = f33,0x036
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p6) fclass.m.unc p13,p0 = f33,0x007
+ nop 999 // EMbo added ...
+ } { .mfi
+(p0) ldfd f56 = [r39],8
+ nop 999 // EMbo added ...
+ nop 999;; // EMbo added ...
+ } { .mfi
+(p0) ldfd f55 = [r39],-8
+(p14) fmerge.s f56 = f32,f0
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+//
+// Return sign_Y * 0 when Y = +/-0 and X > 0
+//
+(p14) fmerge.s f55 = f32,f0
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p15) fmerge.s f56 = f32,f56
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+//
+// Return sign_Y * PI when X < -0
+//
+//
+(p15) fmerge.s f55 = f32,f55
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p0) fadd.d.s0 f57 = f56,f55
+ nop.i 0
+ } { .bbb
+//
+// Call error support function for atan(0,0)
+// - expected value already computed.
+//
+ nop.b 0
+ nop.b 0
+(p0) br.cond.sptk L(RETURN_ATAN)
+ }
+L(ATAN_ArgY_Not_ZERO):
+ { .mfi
+ nop 999 // EMbo added ...
+(p0) fclass.m.unc p9,p10 = f32,0x023
+ nop 999;; // EMbo added ...
+ } { .mfb
+ nop 999 // EMbo added ...
+(p9) fclass.m.unc p6,p0 = f33,0x017
+(p10) br.cond.sptk L(ATAN_ArgY_Not_INF);;
+ } { .mfi
+(p6) add r39 = 16,r39
+(p9) fclass.m.unc p7,p0 = f33,0x021
+ nop 999;; // EMbo added ...
+ } { .mmf
+ nop 999 // EMbo added ...
+(p0) ldfd f56 = [r39],8
+(p9) fclass.m.unc p8,p0 = f33,0x022;;
+ } { .mbb
+(p0) ldfd f55 = [r39],-8
+ nop 999 // EMbo added ...
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p6) fmerge.s f56 = f32,f56
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p6) fmerge.s f55 = f32,f55
+ nop 999;; // EMbo added ...
+ } { .mfb
+ nop 999 // EMbo added ...
+//
+// Load I/2 and adjust its sign.
+// Return +I/2 when ArgY = +Inf and ArgX = +/-0,normal
+// Return -I/2 when ArgY = -Inf and ArgX = +/-0,normal
+//
+(p6) fadd.d.s0 f57 = f56, f55
+(p6) br.cond.sptk L(RETURN_ATAN);;
+ } { .mmi
+(p7) add r39 = 32,r39;;
+(p7) ldfd f56 = [r39],8
+ nop 999;; // EMbo added ...
+ } { .mmi
+ nop 999;; // EMbo added ...
+(p7) ldfd f55 = [r39],-8
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p7) fmerge.s f56 = f32,f56
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p7) fmerge.s f55 = f32,f55
+ nop 999;; // EMbo added ...
+ } { .mfb
+ nop 999 // EMbo added ...
+//
+// Load PI/4 and adjust its sign.
+// Return +PI/4 when ArgY = +Inf and ArgX = +Inf
+// Return -PI/4 when ArgY = -Inf and ArgX = +Inf
+//
+(p7) fadd.d.s0 f57 = f56, f55
+(p7) br.cond.sptk L(RETURN_ATAN);;
+ } { .mmi
+(p8) add r39 = 48,r39;;
+(p8) ldfd f56 =[r39],8
+ nop 999;; // EMbo added ...
+ } { .mmi
+ nop 999;; // EMbo added ...
+(p8) ldfd f55 =[r39],-8
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p8) fmerge.s f56 = f32,f56
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p8) fmerge.s f55 = f32,f55
+ nop 999;; // EMbo added ...
+ } { .mfb
+ nop 999 // EMbo added ...
+//
+// Load I/4 and adjust its sign.
+// Return +3I/4 when ArgY = +Inf and ArgX = -Inf
+// Return -3I/4 when ArgY = -Inf and ArgX = -Inf
+//
+(p8) fadd.d.s0 f57 = f56, f55
+(p8) br.cond.sptk L(RETURN_ATAN);;
+ }
+L(ATAN_ArgY_Not_INF):
+ { .mfi
+ nop 999 // EMbo added ...
+(p0) fclass.m.unc p6,p0 = f33,0x007
+ nop 999 // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p0) fclass.m.unc p7,p0 = f33,0x021
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p0) fclass.m.unc p8,p0 = f33,0x022
+(p6) add r39 = 16,r39;;
+ } { .mfi
+(p6) ldfd f56 =[r39],8
+ nop 999 // EMbo added ...
+ nop 999;; // EMbo added ...
+ } { .mmi
+ nop 999;; // EMbo added ...
+(p6) ldfd f55 =[r39],-8
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p6) fmerge.s f56 = f32,f56
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p6) fmerge.s f55 = f32,f55
+ nop 999;; // EMbo added ...
+ } { .mfb
+ nop 999 // EMbo added ...
+//
+// return = sign_Y * I/2 when ArgX = +/-0
+//
+(p6) fadd.d.s0 f57 = f56, f55
+(p6) br.cond.sptk L(RETURN_ATAN);;
+ } { .mfi
+ nop 999 // EMbo added ...
+(p7) fmerge.s f56 = f32,f0
+ nop 999 // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p7) fmerge.s f55 = f32,f0
+ nop 999;; // EMbo added ...
+ } { .mfb
+ nop 999 // EMbo added ...
+//
+// return = sign_Y * 0 when ArgX = Inf
+//
+(p7) fadd.d.s0 f57 = f56, f55
+(p7) br.cond.sptk L(RETURN_ATAN);;
+ } { .mfi
+(p8) ldfd f56 = [r39],8
+ nop 999 // EMbo added ...
+ nop 999;; // EMbo added ...
+ } { .mmi
+ nop 999;; // EMbo added ...
+(p8) ldfd f55 = [r39],-8
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p8) fmerge.s f56 = f32,f56
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+(p8) fmerge.s f55 = f32,f55
+ nop 999;; // EMbo added ...
+ } { .mfi
+ nop 999 // EMbo added ...
+//
+// return = sign_Y * I when ArgX = -Inf
+//
+(p8) fadd.d.s0 f57 = f56, f55
+ nop 999 // EMbo added ...
+ };;
+L(RETURN_ATAN):
+// mov f8 = f57 ;;
+// The answer is in f57.
+// But Z_hi is f56
+// Z_lo is f55
+// s_Y is f34
+// W is in f9 and untouched
+
+{ .mfi
+ nop 999
+mov f8 = f56
+ nop.i 0
+};;
+
+{ .mfi
+ nop 999
+mov f10 = f55
+ nop.i 999
+}
+{ .mfb
+ nop 999
+mov f11 = f34
+br.ret.sptk b0
+};;
+
+.endp __libm_atan2_reg
+ASM_SIZE_DIRECTIVE(__libm_atan2_reg)
diff --git a/sysdeps/ia64/fpu/libm_error.c b/sysdeps/ia64/fpu/libm_error.c
new file mode 100644
index 0000000..26916fd
--- /dev/null
+++ b/sysdeps/ia64/fpu/libm_error.c
@@ -0,0 +1,3545 @@
+//
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story, James
+// Edwards, and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00: Initial version
+// 3/22/00: Updated to support flexible and dynamic error handling.
+// 8/16/00: Changed all matherr function-calls to use the pmatherr
+// function-pointers.
+// 10/03/00: Corrected a scalb type.
+// 11/28/00: Changed INPUT_XL to INPUT_XD for scalb_underflow case.
+// 12/07/00: Added code to make scalbn error support equivalent to ldexp.
+// 2/07/01: Added __declspec(align(16)) to long double constants to correct
+// alignment problem.
+//
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "libm_support.h"
+
+#ifndef _LIBC
+_LIB_VERSION_TYPE
+#if defined( __POSIX__ )
+_LIB_VERSION = _POSIX_;
+#elif defined( __XOPEN__ )
+_LIB_VERSION = _XOPEN_;
+#elif defined( __SVID__ )
+_LIB_VERSION = _SVID_;
+#elif defined( __IEEE__ )
+_LIB_VERSION = _IEEE_;
+#else
+_LIB_VERSION = _ISOC_;
+#endif
+#endif
+
+/************************************************************/
+/* matherrX function pointers and setusermatherrX functions */
+/************************************************************/
+#if 0
+int (*pmatherrf)(struct exceptionf*) = MATHERR_F;
+int (*pmatherr)(struct EXC_DECL_D*) = MATHERR_D;
+int (*pmatherrl)(struct exceptionl*) = matherrl;
+
+void __libm_setusermatherrf( int(*user_merrf)(struct exceptionf*) )
+{ pmatherrf = ( (user_merrf==NULL)? (MATHERR_F) : (user_merrf) ); }
+
+void __libm_setusermatherr( int(*user_merr)(struct EXC_DECL_D*) )
+{ pmatherr = ( (user_merr==NULL)? (MATHERR_D) : (user_merr) ); }
+
+void __libm_setusermatherrl( int(*user_merrl)(struct exceptionl*) )
+{ pmatherrl = ( (user_merrl==NULL)? (matherrl) : (user_merrl) ); }
+#endif
+
+/***********************************************/
+/* error-handling function, libm_error_support */
+/***********************************************/
+void __libm_error_support(void *arg1,void *arg2,void *retval,error_types input_tag)
+{
+
+
+# ifdef __cplusplus
+struct __exception exc;
+# else
+struct exception exc;
+# endif
+
+struct exceptionf excf;
+struct exceptionl excl;
+
+# if defined opensource || defined _LIBC
+#define ALIGNIT
+#define ALIGNATTR __attribute__ ((__aligned__ (16)))
+# else
+#define ALIGNIT __declspec(align(16))
+#define ALIGNATTR
+# endif
+
+const char float_inf[4] = {0x00,0x00,0x80,0x7F};
+const char float_huge[4] = {0xFF,0xFF,0x7F,0x7F};
+const char float_zero[4] = {0x00,0x00,0x00,0x00};
+const char float_neg_inf[4] = {0x00,0x00,0x80,0xFF};
+const char float_neg_huge[4] = {0xFF,0xFF,0x7F,0xFF};
+const char float_neg_zero[4] = {0x00,0x00,0x00,0x80};
+ALIGNIT
+const char double_inf[8] ALIGNATTR = {0x00,0x00,0x00,0x00,0x00,0x00,0xF0,0x7F};
+ALIGNIT
+//const char double_huge[8] ALIGNATTR = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xEF,0x7F};
+ALIGNIT
+const char double_zero[8] ALIGNATTR = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
+ALIGNIT
+const char double_neg_inf[8] ALIGNATTR = {0x00,0x00,0x00,0x00,0x00,0x00,0xF0,0xFF};
+ALIGNIT
+//const char double_neg_huge[8] ALIGNATTR = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xEF,0xFF};
+ALIGNIT
+const char double_neg_zero[8] ALIGNATTR = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80};
+ALIGNIT
+const char long_double_inf[16] ALIGNATTR = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0xFF,0x7F,0x00,0x00,0x00,0x00,0x00,0x00};
+ALIGNIT
+//const char long_double_huge[16] ALIGNATTR = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFE,0x7F,0x00,0x00,0x00,0x00,0x00,0x00};
+ALIGNIT
+const char long_double_zero[16] ALIGNATTR = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
+ALIGNIT
+const char long_double_neg_inf[16] ALIGNATTR = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0xFF,0xFF,0x00,0x00,0x00,0x00,0x00,0x00};
+ALIGNIT
+//const char long_double_neg_huge[16] ALIGNATTR = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, 0xFE,0xFF,0x00,0x00,0x00,0x00,0x00,0x00};
+ALIGNIT
+const char long_double_neg_zero[16] ALIGNATTR = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x80,0x00,0x00,0x00,0x00,0x00,0x00};
+
+#define RETVAL_HUGE_VALL *(long double *)retval = *(long double *)long_double_inf
+#define RETVAL_NEG_HUGE_VALL *(long double *)retval = *(long double *)long_double_neg_inf
+#define RETVAL_HUGEL *(long double *)retval = (long double)*(float *)float_huge
+#define RETVAL_NEG_HUGEL *(long double *)retval =(long double)*(float*)float_neg_huge
+
+#define RETVAL_HUGE_VALD *(double *)retval = *(double *) double_inf
+#define RETVAL_NEG_HUGE_VALD *(double *)retval = *(double *) double_neg_inf
+#define RETVAL_HUGED *(double *)retval = (double) *(float *)float_huge
+#define RETVAL_NEG_HUGED *(double *)retval = (double) *(float *) float_neg_huge
+
+#define RETVAL_HUGE_VALF *(float *)retval = *(float *) float_inf
+#define RETVAL_NEG_HUGE_VALF *(float *)retval = *(float *) float_neg_inf
+#define RETVAL_HUGEF *(float *)retval = *(float *) float_huge
+#define RETVAL_NEG_HUGEF *(float *)retval = *(float *) float_neg_huge
+
+#define RETVAL_ZEROL *(long double *)retval = *(long double *)long_double_zero
+#define RETVAL_ZEROD *(double *)retval = *(double *)double_zero
+#define RETVAL_ZEROF *(float *)retval = *(float *)float_zero
+
+#define RETVAL_NEG_ZEROL *(long double *)retval = *(long double *)long_double_neg_zero
+#define RETVAL_NEG_ZEROD *(double *)retval = *(double *)double_neg_zero
+#define RETVAL_NEG_ZEROF *(float *)retval = *(float *)float_neg_zero
+
+#define RETVAL_ONEL *(long double *)retval = (long double) 1.0
+#define RETVAL_ONED *(double *)retval = 1.0
+#define RETVAL_ONEF *(float *)retval = 1.0f
+
+#define NOT_MATHERRL excl.arg1=*(long double *)arg1;excl.arg2=*(long double *)arg2;excl.retval=*(long double *)retval;if(!matherrl(&excl))
+#define NOT_MATHERRD exc.arg1=*(double *)arg1;exc.arg2=*(double *)arg2;exc.retval=*(double *)retval;if(!MATHERR_D(&exc))
+#define NOT_MATHERRF excf.arg1=*(float *)arg1;excf.arg2=*(float *)arg2;excf.retval=*(float *)retval;if(!MATHERR_F(&excf))
+
+#define ifSVID if(_LIB_VERSION==_SVID_)
+
+#define NAMEL excl.name
+#define NAMED exc.name
+#define NAMEF excf.name
+
+//
+// These should work OK for MS because they are ints -
+// leading underbars are not necessary.
+//
+
+#define DOMAIN 1
+#define SING 2
+#define OVERFLOW 3
+#define UNDERFLOW 4
+#define TLOSS 5
+#define PLOSS 6
+
+#define SINGL excl.type = SING
+#define DOMAINL excl.type = DOMAIN
+#define OVERFLOWL excl.type = OVERFLOW
+#define UNDERFLOWL excl.type = UNDERFLOW
+#define TLOSSL excl.type = TLOSS
+#define SINGD exc.type = SING
+#define DOMAIND exc.type = DOMAIN
+#define OVERFLOWD exc.type = OVERFLOW
+#define UNDERFLOWD exc.type = UNDERFLOW
+#define TLOSSD exc.type = TLOSS
+#define SINGF excf.type = SING
+#define DOMAINF excf.type = DOMAIN
+#define OVERFLOWF excf.type = OVERFLOW
+#define UNDERFLOWF excf.type = UNDERFLOW
+#define TLOSSF excf.type = TLOSS
+
+#define INPUT_XL (excl.arg1=*(long double*)arg1)
+#define INPUT_XD (exc.arg1=*(double*)arg1)
+#define INPUT_XF (excf.arg1=*(float*)arg1)
+#define INPUT_YL (excl.arg1=*(long double*)arg2)
+#define INPUT_YD (exc.arg1=*(double*)arg2)
+#define INPUT_YF (excf.arg1=*(float*)arg2)
+#define INPUT_RESL (*(long double *)retval)
+#define INPUT_RESD (*(double *)retval)
+#define INPUT_RESF (*(float *)retval)
+
+#define WRITEL_LOG_ZERO fputs("logl: SING error\n",stderr)
+#define WRITED_LOG_ZERO fputs("log: SING error\n",stderr)
+#define WRITEF_LOG_ZERO fputs("logf: SING error\n",stderr)
+#define WRITEL_LOG_NEGATIVE fputs("logl: DOMAIN error\n",stderr)
+#define WRITED_LOG_NEGATIVE fputs("log: DOMAIN error\n",stderr)
+#define WRITEF_LOG_NEGATIVE fputs("logf: DOMAIN error\n",stderr)
+#define WRITEL_Y0_ZERO fputs("y0l: DOMAIN error\n",stderr)
+#define WRITED_Y0_ZERO fputs("y0: DOMAIN error\n",stderr)
+#define WRITEF_Y0_ZERO fputs("y0f: DOMAIN error\n",stderr)
+#define WRITEL_Y0_NEGATIVE fputs("y0l: DOMAIN error\n",stderr)
+#define WRITED_Y0_NEGATIVE fputs("y0: DOMAIN error\n",stderr)
+#define WRITEF_Y0_NEGATIVE fputs("y0f: DOMAIN error\n",stderr)
+#define WRITEL_Y1_ZERO fputs("y1l: DOMAIN error\n",stderr)
+#define WRITED_Y1_ZERO fputs("y1: DOMAIN error\n",stderr)
+#define WRITEF_Y1_ZERO fputs("y1f: DOMAIN error\n",stderr)
+#define WRITEL_Y1_NEGATIVE fputs("y1l: DOMAIN error\n",stderr)
+#define WRITED_Y1_NEGATIUE fputs("y1: DOMAIN error\n",stderr)
+#define WRITEF_Y1_NEGATIVE fputs("y1f: DOMAIN error\n",stderr)
+#define WRITEL_YN_ZERO fputs("ynl: DOMAIN error\n",stderr)
+#define WRITED_YN_ZERO fputs("yn: DOMAIN error\n",stderr)
+#define WRITEF_YN_ZERO fputs("ynf: DOMAIN error\n",stderr)
+#define WRITEL_YN_NEGATIVE fputs("ynl: DOMAIN error\n",stderr)
+#define WRITED_YN_NEGATIVE fputs("yn: DOMAIN error\n",stderr)
+#define WRITEF_YN_NEGATIVE fputs("ynf: DOMAIN error\n",stderr)
+#define WRITEL_LOG1P_ZERO fputs("log1pl: SING error\n",stderr)
+#define WRITED_LOG1P_ZERO fputs("log1p: SING error\n",stderr)
+#define WRITEF_LOG1P_ZERO fputs("log1pf: SING error\n",stderr)
+#define WRITEL_LOG1P_NEGATIVE fputs("log1pl: DOMAIN error\n",stderr)
+#define WRITED_LOG1P_NEGATIVE fputs("log1p: DOMAIN error\n",stderr)
+#define WRITEF_LOG1P_NEGATIVE fputs("log1pf: DOMAIN error\n",stderr)
+#define WRITEL_LOG10_ZERO fputs("log10l: SING error\n",stderr)
+#define WRITED_LOG10_ZERO fputs("log10: SING error\n",stderr)
+#define WRITEF_LOG10_ZERO fputs("log10f: SING error\n",stderr)
+#define WRITEL_LOG10_NEGATIVE fputs("log10l: DOMAIN error\n",stderr)
+#define WRITED_LOG10_NEGATIVE fputs("log10: DOMAIN error\n",stderr)
+#define WRITEF_LOG10_NEGATIVE fputs("log10f: DOMAIN error\n",stderr)
+#define WRITEL_POW_ZERO_TO_ZERO fputs("powl(0,0): DOMAIN error\n",stderr)
+#define WRITED_POW_ZERO_TO_ZERO fputs("pow(0,0): DOMAIN error\n",stderr)
+#define WRITEF_POW_ZERO_TO_ZERO fputs("powf(0,0): DOMAIN error\n",stderr)
+#define WRITEL_POW_ZERO_TO_NEGATIVE fputs("powl(0,negative): DOMAIN error\n",stderr)
+#define WRITED_POW_ZERO_TO_NEGATIVE fputs("pow(0,negative): DOMAIN error\n",stderr)
+#define WRITEF_POW_ZERO_TO_NEGATIVE fputs("powf(0,negative): DOMAIN error\n",stderr)
+#define WRITEL_POW_NEG_TO_NON_INTEGER fputs("powl(negative,non-integer): DOMAIN error\n",stderr)
+#define WRITED_POW_NEG_TO_NON_INTEGER fputs("pow(negative,non-integer): DOMAIN error\n",stderr)
+#define WRITEF_POW_NEG_TO_NON_INTEGER fputs("powf(negative,non-integer): DOMAIN error\n",stderr)
+#define WRITEL_ATAN2_ZERO_BY_ZERO fputs("atan2l: DOMAIN error\n",stderr)
+#define WRITED_ATAN2_ZERO_BY_ZERO fputs("atan2: DOMAIN error\n",stderr)
+#define WRITEF_ATAN2_ZERO_BY_ZERO fputs("atan2f: DOMAIN error\n",stderr)
+#define WRITEL_SQRT fputs("sqrtl: DOMAIN error\n",stderr)
+#define WRITED_SQRT fputs("sqrt: DOMAIN error\n",stderr)
+#define WRITEF_SQRT fputs("sqrtf: DOMAIN error\n",stderr)
+#define WRITEL_FMOD fputs("fmodl: DOMAIN error\n",stderr)
+#define WRITED_FMOD fputs("fmod: DOMAIN error\n",stderr)
+#define WRITEF_FMOD fputs("fmodf: DOMAIN error\n",stderr)
+#define WRITEL_REM fputs("remainderl: DOMAIN error\n",stderr)
+#define WRITED_REM fputs("remainder: DOMAIN error\n",stderr)
+#define WRITEF_REM fputs("remainderf: DOMAIN error\n",stderr)
+#define WRITEL_ACOS fputs("acosl: DOMAIN error\n",stderr)
+#define WRITED_ACOS fputs("acos: DOMAIN error\n",stderr)
+#define WRITEF_ACOS fputs("acosf: DOMAIN error\n",stderr)
+#define WRITEL_ASIN fputs("asinl: DOMAIN error\n",stderr)
+#define WRITED_ASIN fputs("asin: DOMAIN error\n",stderr)
+#define WRITEF_ASIN fputs("asinf: DOMAIN error\n",stderr)
+#define WRITEL_ACOSH fputs("acoshl: DOMAIN error\n",stderr)
+#define WRITED_ACOSH fputs("acosh: DOMAIN error\n",stderr)
+#define WRITEF_ACOSH fputs("acoshf: DOMAIN error\n",stderr)
+#define WRITEL_ATANH_GT_ONE fputs("atanhl: DOMAIN error\n",stderr)
+#define WRITED_ATANH_GT_ONE fputs("atanh: DOMAIN error\n",stderr)
+#define WRITEF_ATANH_GT_ONE fputs("atanhf: DOMAIN error\n",stderr)
+#define WRITEL_ATANH_EQ_ONE fputs("atanhl: SING error\n",stderr)
+#define WRITED_ATANH_EQ_ONE fputs("atanh: SING error\n",stderr)
+#define WRITEF_ATANH_EQ_ONE fputs("atanhf: SING error\n",stderr)
+#define WRITEL_LGAMMA_NEGATIVE fputs("lgammal: SING error\n",stderr)
+#define WRITED_LGAMMA_NEGATIVE fputs("lgamma: SING error\n",stderr)
+#define WRITEF_LGAMMA_NEGATIVE fputs("lgammaf: SING error\n",stderr)
+#define WRITEL_GAMMA_NEGATIVE fputs("gammal: SING error\n",stderr)
+#define WRITED_GAMMA_NEGATIVE fputs("gamma: SING error\n",stderr)
+#define WRITEF_GAMMA_NEGATIVE fputs("gammaf: SING error\n",stderr)
+#define WRITEL_J0_TLOSS fputs("j0l: TLOSS error\n",stderr)
+#define WRITEL_Y0_TLOSS fputs("y0l: TLOSS error\n",stderr)
+#define WRITEL_J1_TLOSS fputs("j1l: TLOSS error\n",stderr)
+#define WRITEL_Y1_TLOSS fputs("y1l: TLOSS error\n",stderr)
+#define WRITEL_JN_TLOSS fputs("jnl: TLOSS error\n",stderr)
+#define WRITEL_YN_TLOSS fputs("ynl: TLOSS error\n",stderr)
+#define WRITED_J0_TLOSS fputs("j0: TLOSS error\n",stderr)
+#define WRITED_Y0_TLOSS fputs("y0: TLOSS error\n",stderr)
+#define WRITED_J1_TLOSS fputs("j1: TLOSS error\n",stderr)
+#define WRITED_Y1_TLOSS fputs("y1: TLOSS error\n",stderr)
+#define WRITED_JN_TLOSS fputs("jn: TLOSS error\n",stderr)
+#define WRITED_YN_TLOSS fputs("yn: TLOSS error\n",stderr)
+#define WRITEF_J0_TLOSS fputs("j0f: TLOSS error\n",stderr)
+#define WRITEF_Y0_TLOSS fputs("y0f: TLOSS error\n",stderr)
+#define WRITEF_J1_TLOSS fputs("j1f: TLOSS error\n",stderr)
+#define WRITEF_Y1_TLOSS fputs("y1f: TLOSS error\n",stderr)
+#define WRITEF_JN_TLOSS fputs("jnf: TLOSS error\n",stderr)
+#define WRITEF_YN_TLOSS fputs("ynf: TLOSS error\n",stderr)
+
+/***********************/
+/* IEEE Path */
+/***********************/
+if(_LIB_VERSION==_IEEE_) return;
+
+/***********************/
+/* C9X Path */
+/***********************/
+else if(_LIB_VERSION==_ISOC_)
+{
+ switch(input_tag)
+ {
+ case logl_zero:
+ case log_zero:
+ case logf_zero:
+ case log10l_zero:
+ case log10_zero:
+ case log10f_zero:
+ case log2l_zero:
+ case log2_zero:
+ case log2f_zero:
+ case log1pl_zero:
+ case log1p_zero:
+ case log1pf_zero:
+ case powl_overflow:
+ case pow_overflow:
+ case powf_overflow:
+ case powl_underflow:
+ case pow_underflow:
+ case powf_underflow:
+ case expl_overflow:
+ case exp_overflow:
+ case expf_overflow:
+ case expl_underflow:
+ case exp_underflow:
+ case expf_underflow:
+ case exp2l_overflow:
+ case exp2_overflow:
+ case exp2f_overflow:
+ case exp2l_underflow:
+ case exp2_underflow:
+ case exp2f_underflow:
+ case exp10l_overflow:
+ case exp10_overflow:
+ case exp10f_overflow:
+ case expm1l_overflow:
+ case expm1_overflow:
+ case expm1f_overflow:
+ case hypotl_overflow:
+ case hypot_overflow:
+ case hypotf_overflow:
+ case sinhl_overflow:
+ case sinh_overflow:
+ case sinhf_overflow:
+ case atanhl_eq_one:
+ case atanh_eq_one:
+ case atanhf_eq_one:
+ case scalbl_overflow:
+ case scalb_overflow:
+ case scalbf_overflow:
+ case scalbl_underflow:
+ case scalb_underflow:
+ case scalbf_underflow:
+ case coshl_overflow:
+ case cosh_overflow:
+ case coshf_overflow:
+ case nextafterl_overflow:
+ case nextafter_overflow:
+ case nextafterf_overflow:
+ case scalbnl_overflow:
+ case scalbn_overflow:
+ case scalbnf_overflow:
+ case scalbnl_underflow:
+ case scalbn_underflow:
+ case scalbnf_underflow:
+ case ldexpl_overflow:
+ case ldexp_overflow:
+ case ldexpf_overflow:
+ case ldexpl_underflow:
+ case ldexp_underflow:
+ case ldexpf_underflow:
+ case lgammal_overflow:
+ case lgamma_overflow:
+ case lgammaf_overflow:
+ case lgammal_negative:
+ case lgamma_negative:
+ case lgammaf_negative:
+ case gammal_overflow:
+ case gamma_overflow:
+ case gammaf_overflow:
+ case gammal_negative:
+ case gamma_negative:
+ case gammaf_negative:
+ case ilogbl_zero:
+ case ilogb_zero:
+ case ilogbf_zero:
+ {
+ ERRNO_RANGE; break;
+ }
+ case logl_negative:
+ case log_negative:
+ case logf_negative:
+ case log10l_negative:
+ case log10_negative:
+ case log10f_negative:
+ case log2l_negative:
+ case log2_negative:
+ case log2f_negative:
+ case log1pl_negative:
+ case log1p_negative:
+ case log1pf_negative:
+ case sqrtl_negative:
+ case sqrt_negative:
+ case sqrtf_negative:
+ case atan2l_zero:
+ case atan2_zero:
+ case atan2f_zero:
+ case powl_zero_to_negative:
+ case powl_neg_to_non_integer:
+ case pow_zero_to_negative:
+ case pow_neg_to_non_integer:
+ case powf_zero_to_negative:
+ case powf_neg_to_non_integer:
+ case fmodl_by_zero:
+ case fmod_by_zero:
+ case fmodf_by_zero:
+ case atanhl_gt_one:
+ case atanh_gt_one:
+ case atanhf_gt_one:
+ case acosl_gt_one:
+ case acos_gt_one:
+ case acosf_gt_one:
+ case asinl_gt_one:
+ case asin_gt_one:
+ case asinf_gt_one:
+ case logbl_zero:
+ case logb_zero:
+ case logbf_zero:
+ case acoshl_lt_one:
+ case acosh_lt_one:
+ case acoshf_lt_one:
+ case y0l_zero:
+ case y0_zero:
+ case y0f_zero:
+ case y1l_zero:
+ case y1_zero:
+ case y1f_zero:
+ case ynl_zero:
+ case yn_zero:
+ case ynf_zero:
+ case y0l_negative:
+ case y0_negative:
+ case y0f_negative:
+ case y1l_negative:
+ case y1_negative:
+ case y1f_negative:
+ case ynl_negative:
+ case yn_negative:
+ case ynf_negative:
+ {
+ ERRNO_DOMAIN; break;
+ }
+ default:
+ abort();
+ }
+ return;
+}
+
+/***********************/
+/* _POSIX_ Path */
+/***********************/
+
+else if(_LIB_VERSION==_POSIX_)
+{
+switch(input_tag)
+ {
+ case gammal_overflow:
+ case lgammal_overflow:
+ {
+ RETVAL_HUGE_VALL; ERRNO_RANGE; break;
+ }
+ case gamma_overflow:
+ case lgamma_overflow:
+ {
+ RETVAL_HUGE_VALD; ERRNO_RANGE; break;
+ }
+ case gammaf_overflow:
+ case lgammaf_overflow:
+ {
+ RETVAL_HUGE_VALF; ERRNO_RANGE; break;
+ }
+ case gammal_negative:
+ case gamma_negative:
+ case gammaf_negative:
+ case lgammal_negative:
+ case lgamma_negative:
+ case lgammaf_negative:
+ {
+ ERRNO_DOMAIN; break;
+ }
+ case ldexpl_overflow:
+ case ldexpl_underflow:
+ case ldexp_overflow:
+ case ldexp_underflow:
+ case ldexpf_overflow:
+ case ldexpf_underflow:
+ case scalbnl_overflow:
+ case scalbnl_underflow:
+ case scalbn_overflow:
+ case scalbn_underflow:
+ case scalbnf_overflow:
+ case scalbnf_underflow:
+ {
+ ERRNO_RANGE; break;
+ }
+ case atanhl_gt_one:
+ case atanhl_eq_one:
+ /* atanhl(|x| >= 1) */
+ {
+ ERRNO_DOMAIN; break;
+ }
+ case atanh_gt_one:
+ case atanh_eq_one:
+ /* atanh(|x| >= 1) */
+ {
+ ERRNO_DOMAIN; break;
+ }
+ case atanhf_gt_one:
+ case atanhf_eq_one:
+ /* atanhf(|x| >= 1) */
+ {
+ ERRNO_DOMAIN; break;
+ }
+ case sqrtl_negative:
+ /* sqrtl(x < 0) */
+ {
+ ERRNO_DOMAIN; break;
+ }
+ case sqrt_negative:
+ /* sqrt(x < 0) */
+ {
+ ERRNO_DOMAIN; break;
+ }
+ case sqrtf_negative:
+ /* sqrtf(x < 0) */
+ {
+ ERRNO_DOMAIN; break;
+ }
+ case y0l_zero:
+ case y1l_zero:
+ case ynl_zero:
+ /* y0l(0) */
+ /* y1l(0) */
+ /* ynl(0) */
+ {
+ RETVAL_NEG_HUGE_VALL; ERRNO_DOMAIN; break;
+ }
+ case y0_zero:
+ case y1_zero:
+ case yn_zero:
+ /* y0(0) */
+ /* y1(0) */
+ /* yn(0) */
+ {
+ RETVAL_NEG_HUGE_VALD; ERRNO_DOMAIN; break;
+ }
+ case y0f_zero:
+ case y1f_zero:
+ case ynf_zero:
+ /* y0f(0) */
+ /* y1f(0) */
+ /* ynf(0) */
+ {
+ RETVAL_NEG_HUGE_VALF; ERRNO_DOMAIN; break;
+ }
+ case y0l_negative:
+ case y1l_negative:
+ case ynl_negative:
+ /* y0l(x < 0) */
+ /* y1l(x < 0) */
+ /* ynl(x < 0) */
+ {
+ RETVAL_NEG_HUGE_VALL; ERRNO_DOMAIN; break;
+ }
+ case y0_negative:
+ case y1_negative:
+ case yn_negative:
+ /* y0(x < 0) */
+ /* y1(x < 0) */
+ /* yn(x < 0) */
+ {
+ RETVAL_NEG_HUGE_VALD; ERRNO_DOMAIN; break;
+ }
+ case y0f_negative:
+ case y1f_negative:
+ case ynf_negative:
+ /* y0f(x < 0) */
+ /* y1f(x < 0) */
+ /* ynf(x < 0) */
+ {
+ RETVAL_NEG_HUGE_VALF; ERRNO_DOMAIN; break;
+ }
+ case logl_zero:
+ case log1pl_zero:
+ case log10l_zero:
+ /* logl(0) */
+ /* log1pl(0) */
+ /* log10l(0) */
+ {
+ RETVAL_NEG_HUGE_VALL; ERRNO_RANGE; break;
+ }
+ case log_zero:
+ case log1p_zero:
+ case log10_zero:
+ case log2l_zero:
+ /* log(0) */
+ /* log1p(0) */
+ /* log10(0) */
+ {
+ RETVAL_NEG_HUGE_VALD; ERRNO_RANGE; break;
+ }
+ case logf_zero:
+ case log1pf_zero:
+ case log10f_zero:
+ /* logf(0) */
+ /* log1pf(0) */
+ /* log10f(0) */
+ {
+ RETVAL_NEG_HUGE_VALF; ERRNO_RANGE; break;
+ }
+ case logl_negative:
+ case log1pl_negative:
+ case log10l_negative:
+ case log2l_negative:
+ /* logl(x < 0) */
+ /* log1pl(x < 0) */
+ /* log10l(x < 0) */
+ {
+ ERRNO_DOMAIN; break;
+ }
+ case log_negative:
+ case log1p_negative:
+ case log10_negative:
+ case log2_negative:
+ /* log(x < 0) */
+ /* log1p(x < 0) */
+ /* log10(x < 0) */
+ {
+ ERRNO_DOMAIN; break;
+ }
+ case logf_negative:
+ case log1pf_negative:
+ case log10f_negative:
+ case log2f_negative:
+ /* logf(x < 0) */
+ /* log1pf(x < 0) */
+ /* log10f(x < 0) */
+ {
+ ERRNO_DOMAIN; break;
+ }
+ case expl_overflow:
+ /* expl overflow */
+ {
+ RETVAL_HUGE_VALL; ERRNO_RANGE; break;
+ }
+ case exp_overflow:
+ /* exp overflow */
+ {
+ RETVAL_HUGE_VALD; ERRNO_RANGE; break;
+ }
+ case expf_overflow:
+ /* expf overflow */
+ {
+ RETVAL_HUGE_VALF; ERRNO_RANGE; break;
+ }
+ case expl_underflow:
+ /* expl underflow */
+ {
+ RETVAL_ZEROL; ERRNO_RANGE; break;
+ }
+ case exp_underflow:
+ /* exp underflow */
+ {
+ RETVAL_ZEROD; ERRNO_RANGE; break;
+ }
+ case expf_underflow:
+ /* expf underflow */
+ {
+ RETVAL_ZEROF; ERRNO_RANGE; break;
+ }
+ case j0l_gt_loss:
+ case y0l_gt_loss:
+ case j1l_gt_loss:
+ case y1l_gt_loss:
+ case jnl_gt_loss:
+ case ynl_gt_loss:
+ /* jn and yn doubl-extended> XLOSS */
+ {
+ RETVAL_ZEROL; ERRNO_RANGE; break;
+ }
+ case j0_gt_loss:
+ case y0_gt_loss:
+ case j1_gt_loss:
+ case y1_gt_loss:
+ case jn_gt_loss:
+ case yn_gt_loss:
+ /* jn and yn double > XLOSS */
+ {
+ RETVAL_ZEROD; ERRNO_RANGE; break;
+ }
+ case j0f_gt_loss:
+ case y0f_gt_loss:
+ case j1f_gt_loss:
+ case y1f_gt_loss:
+ case jnf_gt_loss:
+ case ynf_gt_loss:
+ /* j0n and y0n > XLOSS */
+ {
+ RETVAL_ZEROF; ERRNO_RANGE; break;
+ }
+ case powl_zero_to_zero:
+ /* powl 0**0 */
+ {
+ break;
+ }
+ case pow_zero_to_zero:
+ /* pow 0**0 */
+ {
+ break;
+ }
+ case powf_zero_to_zero:
+ /* powf 0**0 */
+ {
+ break;
+ }
+ case powl_overflow:
+ /* powl(x,y) overflow */
+ {
+ if (INPUT_RESL < 0) RETVAL_NEG_HUGE_VALL;
+ else RETVAL_HUGE_VALL;
+ ERRNO_RANGE; break;
+ }
+ case pow_overflow:
+ /* pow(x,y) overflow */
+ {
+ if (INPUT_RESD < 0) RETVAL_NEG_HUGE_VALD;
+ else RETVAL_HUGE_VALD;
+ ERRNO_RANGE; break;
+ }
+ case powf_overflow:
+ /* powf(x,y) overflow */
+ {
+ if (INPUT_RESF < 0) RETVAL_NEG_HUGE_VALF;
+ else RETVAL_HUGE_VALF;
+ ERRNO_RANGE; break;
+ }
+ case powl_underflow:
+ /* powl(x,y) underflow */
+ {
+ RETVAL_ZEROL; ERRNO_RANGE; break;
+ }
+ case pow_underflow:
+ /* pow(x,y) underflow */
+ {
+ RETVAL_ZEROD; ERRNO_RANGE; break;
+ }
+ case powf_underflow:
+ /* powf(x,y) underflow */
+ {
+ RETVAL_ZEROF; ERRNO_RANGE; break;
+ }
+ case powl_zero_to_negative:
+ /* 0**neg */
+ {
+ ERRNO_DOMAIN; break;
+ }
+ case pow_zero_to_negative:
+ /* 0**neg */
+ {
+ ERRNO_DOMAIN; break;
+ }
+ case powf_zero_to_negative:
+ /* 0**neg */
+ {
+ ERRNO_DOMAIN; break;
+ }
+ case powl_neg_to_non_integer:
+ /* neg**non_integral */
+ {
+ ERRNO_DOMAIN; break;
+ }
+ case pow_neg_to_non_integer:
+ /* neg**non_integral */
+ {
+ ERRNO_DOMAIN; break;
+ }
+ case powf_neg_to_non_integer:
+ /* neg**non-integral */
+ {
+ ERRNO_DOMAIN; break;
+ }
+ case powl_nan_to_zero:
+ /* powl(NaN,0.0) */
+ /* Special Error */
+ {
+ break;
+ }
+ case pow_nan_to_zero:
+ /* pow(NaN,0.0) */
+ {
+ break;
+ }
+ case powf_nan_to_zero:
+ /* powf(NaN,0.0) */
+ {
+ break;
+ }
+ case atan2l_zero:
+ /* atan2l(0,0) */
+ {
+ /* XXX arg1 and arg2 are switched!!!! */
+ if (signbit (*(long double *) arg1))
+ /* y == -0 */
+ *(long double *) retval = copysignl (M_PIl, *(long double *) arg2);
+ else
+ *(long double *) retval = *(long double *) arg2;
+ ERRNO_DOMAIN; break;
+ }
+ case atan2_zero:
+ /* atan2(0,0) */
+ {
+ /* XXX arg1 and arg2 are switched!!!! */
+ if (signbit (*(double *) arg1))
+ /* y == -0 */
+ *(double *) retval = copysign (M_PI, *(double *) arg2);
+ else
+ *(double *) retval = *(double *) arg2;
+ ERRNO_DOMAIN; break;
+ }
+ case
+ atan2f_zero:
+ /* atan2f(0,0) */
+ {
+ if (signbit (*(float *) arg2))
+ /* y == -0 */
+ *(float *) retval = copysignf (M_PI, *(float *) arg1);
+ else
+ *(float *) retval = *(float *) arg1;
+ ERRNO_DOMAIN; break;
+ }
+ case expm1l_overflow:
+ /* expm1 overflow */
+ {
+ ERRNO_RANGE; break;
+ }
+ case expm1_overflow:
+ /* expm1 overflow */
+ {
+ ERRNO_RANGE; break;
+ }
+ case expm1f_overflow:
+ /* expm1f overflow */
+ {
+ ERRNO_RANGE; break;
+ }
+ case expm1l_underflow:
+ /* expm1 underflow */
+ {
+ ERRNO_RANGE; break;
+ }
+ case expm1_underflow:
+ /* expm1 underflow */
+ {
+ ERRNO_RANGE; break;
+ }
+ case expm1f_underflow:
+ /* expm1f underflow */
+ {
+ ERRNO_RANGE; break;
+ }
+ case hypotl_overflow:
+ /* hypotl overflow */
+ {
+ RETVAL_HUGE_VALL; ERRNO_RANGE; break;
+ }
+ case hypot_overflow:
+ /* hypot overflow */
+ {
+ RETVAL_HUGE_VALD; ERRNO_RANGE; break;
+ }
+ case hypotf_overflow:
+ /* hypotf overflow */
+ {
+ RETVAL_HUGE_VALF; ERRNO_RANGE; break;
+ }
+ case scalbl_underflow:
+ /* scalbl underflow */
+ {
+ if (INPUT_XL < 0) RETVAL_NEG_ZEROL;
+ else RETVAL_ZEROL;
+ ERRNO_RANGE; break;
+ }
+ case scalb_underflow:
+ /* scalb underflow */
+ {
+ if (INPUT_XD < 0) RETVAL_NEG_ZEROD;
+ else RETVAL_ZEROD;
+ ERRNO_RANGE; break;
+ }
+ case scalbf_underflow:
+ /* scalbf underflow */
+ {
+ if (INPUT_XF < 0) RETVAL_NEG_ZEROF;
+ else RETVAL_ZEROF;
+ ERRNO_RANGE; break;
+ }
+ case scalbl_overflow:
+ /* scalbl overflow */
+ {
+ if (INPUT_XL < 0) RETVAL_NEG_HUGE_VALL;
+ else RETVAL_HUGE_VALL;
+ ERRNO_RANGE; break;
+ }
+ case scalb_overflow:
+ /* scalb overflow */
+ {
+ if (INPUT_XD < 0) RETVAL_NEG_HUGE_VALD;
+ else RETVAL_HUGE_VALD;
+ ERRNO_RANGE; break;
+ }
+ case scalbf_overflow:
+ /* scalbf overflow */
+ {
+ if (INPUT_XF < 0) RETVAL_NEG_HUGE_VALF;
+ else RETVAL_HUGE_VALF;
+ ERRNO_RANGE; break;
+ }
+ case acoshl_lt_one:
+ /* acoshl(x < 1) */
+ {
+ ERRNO_DOMAIN; break;
+ }
+ case acosh_lt_one:
+ /* acosh(x < 1) */
+ {
+ ERRNO_DOMAIN; break;
+ }
+ case acoshf_lt_one:
+ /* acoshf(x < 1) */
+ {
+ ERRNO_DOMAIN; break;
+ }
+ case acosl_gt_one:
+ /* acosl(x > 1) */
+ {
+ ERRNO_DOMAIN; break;
+ }
+ case acos_gt_one:
+ /* acos(x > 1) */
+ {
+ ERRNO_DOMAIN; break;
+ }
+ case acosf_gt_one:
+ /* acosf(x > 1) */
+ {
+ ERRNO_DOMAIN; break;
+ }
+ case asinl_gt_one:
+ /* asinl(x > 1) */
+ {
+ ERRNO_DOMAIN; break;
+ }
+ case asin_gt_one:
+ /* asin(x > 1) */
+ {
+ ERRNO_DOMAIN; break;
+ }
+ case asinf_gt_one:
+ /* asinf(x > 1) */
+ {
+ ERRNO_DOMAIN; break;
+ }
+ case remainderl_by_zero:
+ case fmodl_by_zero:
+ /* fmodl(x,0) */
+ {
+ ERRNO_DOMAIN; break;
+ }
+ case remainder_by_zero:
+ case fmod_by_zero:
+ /* fmod(x,0) */
+ {
+ ERRNO_DOMAIN; break;
+ }
+ case remainderf_by_zero:
+ case fmodf_by_zero:
+ /* fmodf(x,0) */
+ {
+ ERRNO_DOMAIN; break;
+ }
+ case coshl_overflow:
+ /* coshl overflows */
+ {
+ RETVAL_HUGE_VALL; ERRNO_RANGE; break;
+ }
+ case cosh_overflow:
+ /* cosh overflows */
+ {
+ RETVAL_HUGE_VALD; ERRNO_RANGE; break;
+ }
+ case coshf_overflow:
+ /* coshf overflows */
+ {
+ RETVAL_HUGE_VALF; ERRNO_RANGE; break;
+ }
+ case sinhl_overflow:
+ /* sinhl overflows */
+ {
+ if (INPUT_XL > 0) RETVAL_HUGE_VALL;
+ else RETVAL_NEG_HUGE_VALL;
+ ERRNO_RANGE; break;
+ }
+ case sinh_overflow:
+ /* sinh overflows */
+ {
+ if (INPUT_XD > 0) RETVAL_HUGE_VALD;
+ else RETVAL_NEG_HUGE_VALD;
+ ERRNO_RANGE; break;
+ }
+ case sinhf_overflow:
+ /* sinhf overflows */
+ {
+ if (INPUT_XF > 0) RETVAL_HUGE_VALF;
+ else RETVAL_NEG_HUGE_VALF;
+ ERRNO_RANGE; break;
+ }
+ case logbl_zero:
+ /* logbl(0) */
+ {
+ ERRNO_DOMAIN; break;
+ }
+ case logb_zero:
+ /* logb(0) */
+ {
+ ERRNO_DOMAIN; break;
+ }
+ case logbf_zero:
+ /* logbf(0) */
+ {
+ ERRNO_DOMAIN; break;
+ }
+ case ilogbl_zero:
+ /* ilogbl(0) */
+ {
+ ERRNO_RANGE; break;
+ }
+ case ilogb_zero:
+ /* ilogb(0) */
+ {
+ ERRNO_RANGE; break;
+ }
+ case ilogbf_zero:
+ /* ilogbf(0) */
+ {
+ ERRNO_RANGE; break;
+ }
+ default:
+ abort();
+}
+return;
+/* _POSIX_ */
+}
+
+/*******************************/
+/* __SVID__ and __XOPEN__ Path */
+/*******************************/
+else
+{
+ switch(input_tag)
+ {
+ case ldexpl_overflow:
+ case ldexpl_underflow:
+ case ldexp_overflow:
+ case ldexp_underflow:
+ case ldexpf_overflow:
+ case ldexpf_underflow:
+ case scalbnl_overflow:
+ case scalbnl_underflow:
+ case scalbn_overflow:
+ case scalbn_underflow:
+ case scalbnf_overflow:
+ case scalbnf_underflow:
+ {
+ ERRNO_RANGE; break;
+ }
+ case sqrtl_negative:
+ /* sqrtl(x < 0) */
+ {
+ DOMAINL; NAMEL = (char *) "sqrtl";
+ ifSVID
+ {
+ RETVAL_ZEROL;
+ NOT_MATHERRL
+ {
+ WRITEL_SQRT;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ { /* NaN already computed */
+ NOT_MATHERRL {ERRNO_DOMAIN;}
+ }
+ *(long double *)retval = excl.retval;
+ break;
+ }
+ case sqrt_negative:
+ /* sqrt(x < 0) */
+ {
+ DOMAIND; NAMED = (char *) "sqrt";
+ ifSVID
+ {
+
+ RETVAL_ZEROD;
+ NOT_MATHERRD
+ {
+ WRITED_SQRT;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ { /* NaN already computed */
+ NOT_MATHERRD {ERRNO_DOMAIN;}
+ }
+ *(double *)retval = exc.retval;
+ break;
+ }
+ case sqrtf_negative:
+ /* sqrtf(x < 0) */
+ {
+ DOMAINF; NAMEF = (char *) "sqrtf";
+ ifSVID
+ {
+ RETVAL_ZEROF;
+ NOT_MATHERRF
+ {
+ WRITEF_SQRT;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ NOT_MATHERRF {ERRNO_DOMAIN;}
+ }
+ *(float *)retval = excf.retval;
+ break;
+ }
+ case logl_zero:
+ case log2l_zero:
+ /* logl(0) */
+ {
+ SINGL; NAMEL = (char *) "logl";
+ ifSVID
+ {
+ RETVAL_NEG_HUGEL;
+ NOT_MATHERRL
+ {
+ WRITEL_LOG_ZERO;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_NEG_HUGE_VALL;
+ NOT_MATHERRL {ERRNO_DOMAIN;}
+ }
+ *(long double *)retval = excl.retval;
+ break;
+ }
+ case log_zero:
+ case log2_zero:
+ /* log(0) */
+ {
+ SINGD; NAMED = (char *) "log";
+ ifSVID
+ {
+ RETVAL_NEG_HUGED;
+ NOT_MATHERRD
+ {
+ WRITED_LOG_ZERO;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_NEG_HUGE_VALD;
+ NOT_MATHERRD {ERRNO_DOMAIN;}
+ }
+ *(double *)retval = exc.retval;
+ break;
+ }
+ case logf_zero:
+ case log2f_zero:
+ /* logf(0) */
+ {
+ SINGF; NAMEF = (char *) "logf";
+ ifSVID
+ {
+ RETVAL_NEG_HUGEF;
+ NOT_MATHERRF
+ {
+ WRITEF_LOG_ZERO;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_NEG_HUGE_VALF;
+ NOT_MATHERRF {ERRNO_DOMAIN;}
+ }
+ *(float *)retval = excf.retval;
+ break;
+ }
+
+ case logl_negative:
+ case log2l_negative:
+ /* logl(x < 0) */
+ {
+ DOMAINL; NAMEL = (char *) "logl";
+ ifSVID
+ {
+ RETVAL_NEG_HUGEL;
+ NOT_MATHERRL
+ {
+ WRITEL_LOG_NEGATIVE;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_NEG_HUGE_VALL;
+ NOT_MATHERRL {ERRNO_DOMAIN;}
+ }
+ *(long double *)retval = excl.retval;
+ break;
+ }
+ case log_negative:
+ case log2_negative:
+ /* log(x < 0) */
+ {
+ DOMAIND; NAMED = (char *) "log";
+ ifSVID
+ {
+ RETVAL_NEG_HUGED;
+ NOT_MATHERRD
+ {
+ WRITED_LOG_NEGATIVE;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_NEG_HUGE_VALD;
+ NOT_MATHERRD {ERRNO_DOMAIN;}
+ }
+ *(double *)retval = exc.retval;
+ break;
+ }
+ case logf_negative:
+ case log2f_negative:
+ /* logf(x < 0) */
+ {
+ DOMAINF; NAMEF = (char *) "logf";
+ ifSVID
+ {
+ RETVAL_NEG_HUGEF;
+ NOT_MATHERRF
+ {
+ WRITEF_LOG_NEGATIVE;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_NEG_HUGE_VALF;
+ NOT_MATHERRF{ERRNO_DOMAIN;}
+ }
+ *(float *)retval = excf.retval;
+ break;
+ }
+ case log1pl_zero:
+ /* log1pl(-1) */
+ {
+ SINGL; NAMEL = (char *) "log1pl";
+ ifSVID
+ {
+ RETVAL_NEG_HUGEL;
+ NOT_MATHERRL
+ {
+ WRITEL_LOG1P_ZERO;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_NEG_HUGE_VALL;
+ NOT_MATHERRL {ERRNO_DOMAIN;}
+ }
+ *(long double *)retval = excl.retval;
+ break;
+ }
+ case log1p_zero:
+ /* log1p(-1) */
+ {
+ SINGD; NAMED = (char *) "log1p";
+ ifSVID
+ {
+ RETVAL_NEG_HUGED;
+ NOT_MATHERRD
+ {
+ WRITED_LOG1P_ZERO;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_NEG_HUGE_VALD;
+ NOT_MATHERRD {ERRNO_DOMAIN;}
+ }
+ *(double *)retval = exc.retval;
+ break;
+ }
+ case log1pf_zero:
+ /* log1pf(-1) */
+ {
+ SINGF; NAMEF = (char *) "log1pf";
+ ifSVID
+ {
+ RETVAL_NEG_HUGEF;
+ NOT_MATHERRF
+ {
+ WRITEF_LOG1P_ZERO;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_NEG_HUGE_VALF;
+ NOT_MATHERRF {}ERRNO_DOMAIN;
+ }
+ *(float *)retval = excf.retval;
+ break;
+ }
+ case log1pl_negative:
+ /* log1pl(x < -1) */
+ {
+ DOMAINL; NAMEL = (char *) "log1pl";
+ ifSVID
+ {
+ RETVAL_NEG_HUGEL;
+ NOT_MATHERRL
+ {
+ WRITEL_LOG1P_NEGATIVE;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_NEG_HUGE_VALL;
+ NOT_MATHERRL {ERRNO_DOMAIN;}
+ }
+ *(long double *)retval = excl.retval;
+ break;
+ }
+ case log1p_negative:
+ /* log1p(x < -1) */
+ {
+ DOMAIND; NAMED = (char *) "log1p";
+ ifSVID
+ {
+ RETVAL_NEG_HUGED;
+ NOT_MATHERRD
+ {
+ WRITED_LOG1P_NEGATIVE;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_NEG_HUGE_VALD;
+ NOT_MATHERRD {ERRNO_DOMAIN;}
+ }
+ *(double *)retval = exc.retval;
+ break;
+ }
+ case log1pf_negative:
+ /* log1pf(x < -1) */
+ {
+ DOMAINF; NAMEF = (char *) "log1pf";
+ ifSVID
+ {
+ RETVAL_NEG_HUGEF;
+ NOT_MATHERRF
+ {
+ WRITEF_LOG1P_NEGATIVE;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_NEG_HUGE_VALF;
+ NOT_MATHERRF {ERRNO_DOMAIN;}
+ }
+ *(float *)retval = excf.retval;
+ break;
+ }
+ case log10l_zero:
+ /* log10l(0) */
+ {
+ SINGL; NAMEL = (char *) "log10l";
+ ifSVID
+ {
+ RETVAL_NEG_HUGEL;
+ NOT_MATHERRL
+ {
+ WRITEL_LOG10_ZERO;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_NEG_HUGE_VALL;
+ NOT_MATHERRL {ERRNO_DOMAIN;}
+ }
+ *(long double *)retval = excl.retval;
+ break;
+ }
+ case log10_zero:
+ /* log10(0) */
+ {
+ SINGD; NAMED = (char *) "log10";
+ ifSVID
+ {
+ RETVAL_NEG_HUGED;
+ NOT_MATHERRD
+ {
+ WRITED_LOG10_ZERO;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_NEG_HUGE_VALD;
+ NOT_MATHERRD {ERRNO_DOMAIN;}
+ }
+ *(double *)retval = exc.retval;
+ break;
+ }
+ case log10f_zero:
+ /* log10f(0) */
+ {
+ SINGF; NAMEF = (char *) "log10f";
+ ifSVID
+ {
+ RETVAL_NEG_HUGEF;
+ NOT_MATHERRF
+ {
+ WRITEF_LOG10_ZERO;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_NEG_HUGE_VALF;
+ NOT_MATHERRF {ERRNO_DOMAIN;}
+ }
+ *(float *)retval = excf.retval;
+ break;
+ }
+ case log10l_negative:
+ /* log10l(x < 0) */
+ {
+ DOMAINL; NAMEL = (char *) "log10l";
+ ifSVID
+ {
+ RETVAL_NEG_HUGEL;
+ NOT_MATHERRL
+ {
+ WRITEL_LOG10_NEGATIVE;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_NEG_HUGE_VALL;
+ NOT_MATHERRL {ERRNO_DOMAIN;}
+ }
+ *(long double *)retval = excl.retval;
+ break;
+ }
+ case log10_negative:
+ /* log10(x < 0) */
+ {
+ DOMAIND; NAMED = (char *) "log10";
+ ifSVID
+ {
+ RETVAL_NEG_HUGED;
+ NOT_MATHERRD
+ {
+ WRITED_LOG10_NEGATIVE;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_NEG_HUGE_VALD;
+ NOT_MATHERRD {ERRNO_DOMAIN;}
+ }
+ *(double *)retval = exc.retval;
+ break;
+ }
+ case log10f_negative:
+ /* log10f(x < 0) */
+ {
+ DOMAINF; NAMEF = (char *) "log10f";
+ ifSVID
+ {
+ RETVAL_NEG_HUGEF;
+ NOT_MATHERRF
+ {
+ WRITEF_LOG10_NEGATIVE;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_NEG_HUGE_VALF;
+ NOT_MATHERRF {ERRNO_DOMAIN;}
+ }
+ *(float *)retval = excf.retval;
+ break;
+ }
+ case expl_overflow:
+ /* expl overflow */
+ {
+ OVERFLOWL; NAMEL = (char *) "expl";
+ ifSVID
+ {
+ RETVAL_HUGEL;
+ }
+ else
+ {
+ RETVAL_HUGE_VALL;
+ }
+ NOT_MATHERRL {ERRNO_RANGE;}
+ *(long double *)retval = excl.retval;
+ break;
+ }
+ case exp_overflow:
+ /* exp overflow */
+ {
+ OVERFLOWD; NAMED = (char *) "exp";
+ ifSVID
+ {
+ RETVAL_HUGED;
+ }
+ else
+ {
+ RETVAL_HUGE_VALD;
+ }
+ NOT_MATHERRD {ERRNO_RANGE;}
+ *(double *)retval = exc.retval;
+ break;
+ }
+ case expf_overflow:
+ /* expf overflow */
+ {
+ OVERFLOWF; NAMEF = (char *) "expf";
+ ifSVID
+ {
+ RETVAL_HUGEF;
+ }
+ else
+ {
+ RETVAL_HUGE_VALF;
+ }
+ NOT_MATHERRF {ERRNO_RANGE;}
+ *(float *)retval = excf.retval;
+ break;
+ }
+ case expl_underflow:
+ /* expl underflow */
+ {
+ UNDERFLOWL; NAMEL = (char *) "expl"; RETVAL_ZEROL;
+ NOT_MATHERRL {ERRNO_RANGE;}
+ *(long double *)retval = excl.retval;
+ break;
+ }
+ case exp_underflow:
+ /* exp underflow */
+ {
+ UNDERFLOWD; NAMED = (char *) "exp"; RETVAL_ZEROD;
+ NOT_MATHERRD {ERRNO_RANGE;}
+ *(double *)retval = exc.retval;
+ break;
+ }
+ case expf_underflow:
+ /* expf underflow */
+ {
+ UNDERFLOWF; NAMEF = (char *) "expf"; RETVAL_ZEROF;
+ NOT_MATHERRF {ERRNO_RANGE;}
+ *(float *)retval = excf.retval;
+ break;
+ }
+ case powl_zero_to_zero:
+ /* powl 0**0 */
+ {
+ DOMAINL; NAMEL = (char *) "powl";
+ ifSVID
+ {
+ RETVAL_ZEROL;
+ NOT_MATHERRL
+ {
+ WRITEL_POW_ZERO_TO_ZERO;
+ ERRNO_RANGE;
+ }
+ *(long double *)retval = excl.retval;
+ }
+ else RETVAL_ONEL;
+ break;
+ }
+ case pow_zero_to_zero:
+ /* pow 0**0 */
+ {
+ DOMAIND; NAMED = (char *) "pow";
+ ifSVID
+ {
+ RETVAL_ZEROD;
+ NOT_MATHERRD
+ {
+ WRITED_POW_ZERO_TO_ZERO;
+ ERRNO_RANGE;
+ }
+ *(double *)retval = exc.retval;
+ }
+ else RETVAL_ONED;
+ break;
+ }
+ case powf_zero_to_zero:
+ /* powf 0**0 */
+ {
+ DOMAINF; NAMEF = (char *) "powf";
+ ifSVID
+ {
+ RETVAL_ZEROF;
+ NOT_MATHERRF
+ {
+ WRITEF_POW_ZERO_TO_ZERO;
+ ERRNO_RANGE;
+ }
+ *(float *)retval = excf.retval;
+ }
+ else RETVAL_ONEF;
+ break;
+ }
+ case powl_overflow:
+ /* powl(x,y) overflow */
+ {
+ OVERFLOWL; NAMEL = (char *) "powl";
+ ifSVID
+ {
+ if (INPUT_XL < 0) RETVAL_NEG_HUGEL;
+ else RETVAL_HUGEL;
+ }
+ else
+ {
+ if (INPUT_XL < 0) RETVAL_NEG_HUGE_VALL;
+ else RETVAL_HUGE_VALL;
+ }
+ NOT_MATHERRL {ERRNO_RANGE;}
+ *(long double *)retval = excl.retval;
+ break;
+ }
+ case pow_overflow:
+ /* pow(x,y) overflow */
+ {
+ OVERFLOWD; NAMED = (char *) "pow";
+ ifSVID
+ {
+ if (INPUT_XD < 0) RETVAL_NEG_HUGED;
+ else RETVAL_HUGED;
+ }
+ else
+ {
+ if (INPUT_XD < 0) RETVAL_NEG_HUGE_VALD;
+ else RETVAL_HUGE_VALD;
+ }
+ NOT_MATHERRD {ERRNO_RANGE;}
+ *(double *)retval = exc.retval;
+ break;
+ }
+ case powf_overflow:
+ /* powf(x,y) overflow */
+ {
+ OVERFLOWF; NAMEF = (char *) "powf";
+ ifSVID
+ {
+ if (INPUT_XF < 0) RETVAL_NEG_HUGEF;
+ else RETVAL_HUGEF;
+ }
+ else
+ {
+ if (INPUT_XF < 0) RETVAL_NEG_HUGE_VALF;
+ else RETVAL_HUGE_VALF;
+ }
+ NOT_MATHERRF {ERRNO_RANGE;}
+ *(float *)retval = excf.retval;
+ break;
+ }
+ case powl_underflow:
+ /* powl(x,y) underflow */
+ {
+ UNDERFLOWL; NAMEL = (char *) "powl"; RETVAL_ZEROL;
+ NOT_MATHERRL {ERRNO_RANGE;}
+ *(long double *)retval = excl.retval;
+ break;
+ }
+ case pow_underflow:
+ /* pow(x,y) underflow */
+ {
+ UNDERFLOWD; NAMED = (char *) "pow"; RETVAL_ZEROD;
+ NOT_MATHERRD {ERRNO_RANGE;}
+ *(double *)retval = exc.retval;
+ break;
+ }
+ case powf_underflow:
+ /* powf(x,y) underflow */
+ {
+ UNDERFLOWF; NAMEF = (char *) "powf"; RETVAL_ZEROF;
+ NOT_MATHERRF {ERRNO_RANGE;}
+ *(float *)retval = excf.retval;
+ break;
+ }
+ case powl_zero_to_negative:
+ /* 0 to neg */
+ {
+ DOMAINL; NAMEL = (char *) "powl";
+ ifSVID
+ {
+ RETVAL_ZEROL;
+ NOT_MATHERRL
+ {
+ WRITEL_POW_ZERO_TO_NEGATIVE;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_NEG_HUGE_VALL;
+ NOT_MATHERRL {ERRNO_DOMAIN;}
+ }
+ *(long double *)retval = excl.retval;
+ break;
+ }
+ case pow_zero_to_negative:
+ /* 0**neg */
+ {
+ DOMAIND; NAMED = (char *) "pow";
+ ifSVID
+ {
+ RETVAL_ZEROD;
+ NOT_MATHERRD
+ {
+ WRITED_POW_ZERO_TO_NEGATIVE;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_NEG_HUGE_VALD;
+ NOT_MATHERRD {ERRNO_DOMAIN;}
+ }
+ *(double *)retval = exc.retval;
+ break;
+ }
+ case powf_zero_to_negative:
+ /* 0**neg */
+ {
+ DOMAINF; NAMEF = (char *) "powf";
+ RETVAL_NEG_HUGE_VALF;
+ ifSVID
+ {
+ RETVAL_ZEROF;
+ NOT_MATHERRF
+ {
+ WRITEF_POW_ZERO_TO_NEGATIVE;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_NEG_HUGE_VALF;
+ NOT_MATHERRF {ERRNO_DOMAIN;}
+ }
+ *(float *)retval = excf.retval;
+ break;
+ }
+ case powl_neg_to_non_integer:
+ /* neg**non_integral */
+ {
+ DOMAINL; NAMEL = (char *) "powl";
+ ifSVID
+ {
+ RETVAL_ZEROF;
+ NOT_MATHERRL
+ {
+ WRITEL_POW_NEG_TO_NON_INTEGER;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ NOT_MATHERRL {ERRNO_DOMAIN;}
+ }
+ *(long double *)retval = excl.retval;
+ break;
+ }
+ case pow_neg_to_non_integer:
+ /* neg**non_integral */
+ {
+ DOMAIND; NAMED = (char *) "pow";
+ ifSVID
+ {
+ RETVAL_ZEROD;
+ NOT_MATHERRD
+ {
+ WRITED_POW_NEG_TO_NON_INTEGER;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ NOT_MATHERRD {ERRNO_DOMAIN;}
+ }
+ *(double *)retval = exc.retval;
+ break;
+ }
+ case powf_neg_to_non_integer:
+ /* neg**non-integral */
+ {
+ DOMAINF; NAMEF = (char *) "powf";
+ ifSVID
+ {
+ RETVAL_ZEROF;
+ NOT_MATHERRF
+ {
+ WRITEF_POW_NEG_TO_NON_INTEGER;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ NOT_MATHERRF {ERRNO_DOMAIN;}
+ }
+ *(float *)retval = excf.retval;
+ break;
+ }
+ case powl_nan_to_zero:
+ /* pow(NaN,0.0) */
+ /* Special Error */
+ {
+ DOMAINL; NAMEL = (char *) "powl"; INPUT_XL; INPUT_YL;
+ excl.retval = *(long double *)arg1;
+ NOT_MATHERRL {ERRNO_DOMAIN;}
+ *(long double *)retval = excl.retval;
+ break;
+ }
+ case pow_nan_to_zero:
+ /* pow(NaN,0.0) */
+ /* Special Error */
+ {
+ DOMAIND; NAMED = (char *) "pow"; INPUT_XD; INPUT_YD;
+ exc.retval = *(double *)arg1;
+ NOT_MATHERRD {ERRNO_DOMAIN;}
+ *(double *)retval = exc.retval;
+ break;
+ }
+ case powf_nan_to_zero:
+ /* powf(NaN,0.0) */
+ /* Special Error */
+ {
+ DOMAINF; NAMEF = (char *) "powf"; INPUT_XF; INPUT_YF;
+ excf.retval = *(float *)arg1;
+ NOT_MATHERRF {ERRNO_DOMAIN;}
+ *(float *)retval = excf.retval;
+ break;
+ }
+ case atan2l_zero:
+ /* atan2l(0.0,0.0) */
+ {
+ DOMAINL; NAMEL = (char *) "atan2l";
+ RETVAL_ZEROL;
+ NOT_MATHERRL
+ {
+ ifSVID
+ {
+ WRITEL_ATAN2_ZERO_BY_ZERO;
+ }
+ ERRNO_DOMAIN;
+ }
+ *(long double *)retval = excl.retval;
+ break;
+ }
+ case atan2_zero:
+ /* atan2(0.0,0.0) */
+ {
+ DOMAIND; NAMED = (char *) "atan2";
+ RETVAL_ZEROD;
+ NOT_MATHERRD
+ {
+ ifSVID
+ {
+ WRITED_ATAN2_ZERO_BY_ZERO;
+ }
+ ERRNO_DOMAIN;
+ }
+ *(double *)retval = exc.retval;
+ break;
+ }
+ case atan2f_zero:
+ /* atan2f(0.0,0.0) */
+ {
+ DOMAINF; NAMEF = (char *) "atan2f";
+ RETVAL_ZEROF;
+ NOT_MATHERRF
+ ifSVID
+ {
+ WRITEF_ATAN2_ZERO_BY_ZERO;
+ }
+ ERRNO_DOMAIN;
+ *(float *)retval = excf.retval;
+ break;
+ }
+ case expm1_overflow:
+ /* expm1(finite) overflow */
+ /* Overflow is the only documented */
+ /* special value. */
+ {
+ ERRNO_RANGE;
+ break;
+ }
+ case expm1f_overflow:
+ /* expm1f(finite) overflow */
+ {
+ ERRNO_RANGE;
+ break;
+ }
+ case expm1_underflow:
+ /* expm1(finite) underflow */
+ /* Underflow is not documented */
+ /* special value. */
+ {
+ ERRNO_RANGE;
+ break;
+ }
+ case expm1f_underflow:
+ /* expm1f(finite) underflow */
+ {
+ ERRNO_RANGE;
+ break;
+ }
+ case scalbl_underflow:
+ /* scalbl underflow */
+ {
+ UNDERFLOWL; NAMEL = (char *) "scalbl";
+ if (INPUT_XL < 0.0L) RETVAL_NEG_ZEROL;
+ else RETVAL_ZEROL;
+ NOT_MATHERRL {ERRNO_RANGE;}
+ *(long double *)retval = excf.retval;
+ break;
+ }
+ case scalb_underflow:
+ /* scalb underflow */
+ {
+ UNDERFLOWD; NAMED = (char *) "scalb";
+ if (INPUT_XD < 0.0) RETVAL_NEG_ZEROD;
+ else RETVAL_ZEROD;
+ NOT_MATHERRD {ERRNO_RANGE;}
+ *(double *)retval = exc.retval;
+ break;
+ }
+ case scalbf_underflow:
+ /* scalbf underflow */
+ {
+ UNDERFLOWF; NAMEF = (char *) "scalbf";
+ if (INPUT_XF < 0.0) RETVAL_NEG_ZEROF;
+ else RETVAL_ZEROF;
+ NOT_MATHERRF {ERRNO_RANGE;}
+ *(float *)retval = excf.retval;
+ break;
+ }
+ case scalbl_overflow:
+ /* scalbl overflow */
+ {
+ OVERFLOWL; NAMEL = (char *) "scalbl";
+ if (INPUT_XL < 0) RETVAL_NEG_HUGE_VALL;
+ else RETVAL_HUGE_VALL;
+ NOT_MATHERRL {ERRNO_RANGE;}
+ *(long double *)retval = excl.retval;
+ break;
+ }
+ case scalb_overflow:
+ /* scalb overflow */
+ {
+ OVERFLOWD; NAMED = (char *) "scalb";
+ if (INPUT_XD < 0) RETVAL_NEG_HUGE_VALD;
+ else RETVAL_HUGE_VALD;
+ NOT_MATHERRD {ERRNO_RANGE;}
+ *(double *)retval = exc.retval;
+ break;
+ }
+ case scalbf_overflow:
+ /* scalbf overflow */
+ {
+ OVERFLOWF; NAMEF = (char *) "scalbf";
+ if (INPUT_XF < 0) RETVAL_NEG_HUGE_VALF;
+ else RETVAL_HUGE_VALF;
+ NOT_MATHERRF {ERRNO_RANGE;}
+ *(float *)retval = excf.retval;
+ break;
+ }
+ case hypotl_overflow:
+ /* hypotl overflow */
+ {
+ OVERFLOWL; NAMEL = (char *) "hypotl";
+ ifSVID
+ {
+ RETVAL_HUGEL;
+ }
+ else
+ {
+ RETVAL_HUGE_VALL;
+ }
+ NOT_MATHERRL {ERRNO_RANGE;}
+ *(long double *)retval = excl.retval;
+ break;
+ }
+ case hypot_overflow:
+ /* hypot overflow */
+ {
+ OVERFLOWD; NAMED = (char *) "hypot";
+ ifSVID
+ {
+ RETVAL_HUGED;
+ }
+ else
+ {
+ RETVAL_HUGE_VALD;
+ }
+ NOT_MATHERRD {ERRNO_RANGE;}
+ *(double *)retval = exc.retval;
+ break;
+ }
+ case hypotf_overflow:
+ /* hypotf overflow */
+ {
+ OVERFLOWF; NAMEF = (char *) "hypotf";
+ ifSVID
+ {
+ RETVAL_HUGEF;
+ }
+ else
+ {
+ RETVAL_HUGE_VALF;
+ }
+ NOT_MATHERRF {ERRNO_RANGE;}
+ *(float *)retval = excf.retval;
+ break;
+ }
+ case acosl_gt_one:
+ /* acosl(x > 1) */
+ {
+ DOMAINL; NAMEL = (char *) "acosl";
+ RETVAL_ZEROL;
+ ifSVID
+ {
+ NOT_MATHERRL
+ {
+ WRITEL_ACOS;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ NOT_MATHERRL {ERRNO_DOMAIN;}
+ }
+ *(long double *)retval = excl.retval;
+ break;
+ }
+ case acos_gt_one:
+ /* acos(x > 1) */
+ {
+ DOMAIND; NAMED = (char *) "acos";
+ RETVAL_ZEROD;
+ ifSVID
+ {
+ NOT_MATHERRD
+ {
+ WRITED_ACOS;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ NOT_MATHERRD {ERRNO_DOMAIN;}
+ }
+ *(double *)retval = exc.retval;
+ break;
+ }
+ case acosf_gt_one:
+ /* acosf(x > 1) */
+ {
+ DOMAINF; NAMEF = (char *) "acosf";
+ RETVAL_ZEROF;
+ ifSVID
+ {
+ NOT_MATHERRF
+ {
+ WRITEF_ACOS;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ NOT_MATHERRF {ERRNO_DOMAIN;}
+ }
+ *(float *)retval = excf.retval;
+ break;
+ }
+ case asinl_gt_one:
+ /* asinl(x > 1) */
+ {
+ DOMAINL; NAMEL = (char *) "asinl";
+ RETVAL_ZEROL;
+ ifSVID
+ {
+ NOT_MATHERRL
+ {
+ WRITEL_ASIN;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ NOT_MATHERRL {ERRNO_DOMAIN;}
+ }
+ *(long double *)retval = excl.retval;
+ break;
+ }
+ case asin_gt_one:
+ /* asin(x > 1) */
+ {
+ DOMAIND; NAMED = (char *) "asin";
+ RETVAL_ZEROD;
+ ifSVID
+ {
+ NOT_MATHERRD
+ {
+ WRITED_ASIN;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ NOT_MATHERRD {ERRNO_DOMAIN;}
+ }
+ *(double *)retval = exc.retval;
+ break;
+ }
+ case asinf_gt_one:
+ /* asinf(x > 1) */
+ {
+ DOMAINF; NAMEF = (char *) "asinf";
+ RETVAL_ZEROF;
+ ifSVID
+ {
+ NOT_MATHERRF
+ {
+ WRITEF_ASIN;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ NOT_MATHERRF {ERRNO_DOMAIN;}
+ }
+ *(float *)retval = excf.retval;
+ break;
+ }
+ case coshl_overflow:
+ /* coshl overflow */
+ {
+ OVERFLOWL; NAMEL = (char *) "coshl";
+ ifSVID
+ {
+ RETVAL_HUGEL;
+ }
+ else
+ {
+ RETVAL_HUGE_VALL;
+ }
+ NOT_MATHERRL {ERRNO_RANGE;}
+ *(long double *)retval = excl.retval;
+ break;
+ }
+ case cosh_overflow:
+ /* cosh overflow */
+ {
+ OVERFLOWD; NAMED = (char *) "cosh";
+ ifSVID
+ {
+ RETVAL_HUGED;
+ }
+ else
+ {
+ RETVAL_HUGE_VALD;
+ }
+ NOT_MATHERRD {ERRNO_RANGE;}
+ *(double *)retval = exc.retval;
+ break;
+ }
+ case coshf_overflow:
+ /* coshf overflow */
+ {
+ OVERFLOWF; NAMEF = (char *) "coshf";
+ ifSVID
+ {
+ RETVAL_HUGEF;
+ }
+ else
+ {
+ RETVAL_HUGE_VALF;
+ }
+ NOT_MATHERRF {ERRNO_RANGE;}
+ *(float *)retval = excf.retval;
+ break;
+ }
+ case sinhl_overflow:
+ /* sinhl overflow */
+ {
+ OVERFLOWL; NAMEL = (char *) "sinhl";
+ ifSVID
+ {
+ if (INPUT_XL > 0.0) RETVAL_HUGEL;
+ else RETVAL_NEG_HUGEL;
+ }
+ else
+ {
+ if (INPUT_XL > 0.0) RETVAL_HUGE_VALL;
+ else RETVAL_NEG_HUGE_VALL;
+ }
+ NOT_MATHERRL {ERRNO_RANGE;}
+ *(long double *)retval = excl.retval;
+ break;
+ }
+ case sinh_overflow:
+ /* sinh overflow */
+ {
+ OVERFLOWD; NAMED = (char *) "sinh";
+ ifSVID
+ {
+ if (INPUT_XD > 0.0) RETVAL_HUGED;
+ else RETVAL_NEG_HUGED;
+ }
+ else
+ {
+ if (INPUT_XD > 0.0) RETVAL_HUGE_VALD;
+ else RETVAL_NEG_HUGE_VALD;
+ }
+ NOT_MATHERRD {ERRNO_RANGE;}
+ *(double *)retval = exc.retval;
+ break;
+ }
+ case sinhf_overflow:
+ /* sinhf overflow */
+ {
+ OVERFLOWF; NAMEF = (char *) "sinhf";
+ ifSVID
+ {
+ if( INPUT_XF > 0.0) RETVAL_HUGEF;
+ else RETVAL_NEG_HUGEF;
+ }
+ else
+ {
+ if (INPUT_XF > 0.0) RETVAL_HUGE_VALF;
+ else RETVAL_NEG_HUGE_VALF;
+ }
+ NOT_MATHERRF {ERRNO_RANGE;}
+ *(float *)retval = excf.retval;
+ break;
+ }
+ case acoshl_lt_one:
+ /* acoshl(x < 1) */
+ {
+ DOMAINL; NAMEL = (char *) "acoshl";
+ ifSVID
+ {
+ NOT_MATHERRL
+ {
+ WRITEL_ACOSH;
+ ERRNO_DOMAIN;
+ }
+ }
+ else NOT_MATHERRL {ERRNO_DOMAIN;}
+ *(long double *)retval = excl.retval;
+ break;
+ }
+ case acosh_lt_one:
+ /* acosh(x < 1) */
+ {
+ DOMAIND; NAMED = (char *) "acosh";
+ ifSVID
+ {
+ NOT_MATHERRD
+ {
+ WRITEL_ACOSH;
+ ERRNO_DOMAIN;
+ }
+ }
+ else NOT_MATHERRD {ERRNO_DOMAIN;}
+ *(double *)retval = exc.retval;
+ break;
+ }
+ case acoshf_lt_one:
+ /* acoshf(x < 1) */
+ {
+ DOMAINF; NAMEF = (char *) "acoshf";
+ ifSVID
+ {
+ NOT_MATHERRF
+ {
+ WRITEF_ACOSH;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ NOT_MATHERRF {ERRNO_DOMAIN;}
+ }
+ *(float *)retval = excf.retval;
+ ERRNO_DOMAIN; break;
+ }
+ case atanhl_gt_one:
+ /* atanhl(|x| > 1) */
+ {
+ DOMAINL; NAMEL = (char *) "atanhl";
+ ifSVID
+ {
+ NOT_MATHERRL
+ {
+ WRITEL_ATANH_GT_ONE;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ NOT_MATHERRL {ERRNO_DOMAIN;}
+ }
+ break;
+ }
+ case atanh_gt_one:
+ /* atanh(|x| > 1) */
+ {
+ DOMAIND; NAMED = (char *) "atanh";
+ ifSVID
+ {
+ NOT_MATHERRD
+ {
+ WRITED_ATANH_GT_ONE;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ NOT_MATHERRD {ERRNO_DOMAIN;}
+ }
+ break;
+ }
+ case atanhf_gt_one:
+ /* atanhf(|x| > 1) */
+ {
+ DOMAINF; NAMEF = (char *) "atanhf";
+ ifSVID
+ {
+ NOT_MATHERRF
+ {
+ WRITEF_ATANH_GT_ONE;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ NOT_MATHERRF {ERRNO_DOMAIN;}
+ }
+ break;
+ }
+ case atanhl_eq_one:
+ /* atanhl(|x| == 1) */
+ {
+ SINGL; NAMEL = (char *)"atanhl";
+ ifSVID
+ {
+ NOT_MATHERRL
+ {
+ WRITEL_ATANH_EQ_ONE;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ NOT_MATHERRL {ERRNO_DOMAIN;}
+ }
+ break;
+ }
+ case atanh_eq_one:
+ /* atanh(|x| == 1) */
+ {
+ SINGD; NAMED = (char *) "atanh";
+ ifSVID
+ {
+ NOT_MATHERRD
+ {
+ WRITED_ATANH_EQ_ONE;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ NOT_MATHERRD {ERRNO_DOMAIN;}
+ }
+ break;
+ }
+ case atanhf_eq_one:
+ /* atanhf(|x| == 1) */
+ {
+ SINGF; NAMEF = (char *) "atanhf";
+ ifSVID
+ {
+ NOT_MATHERRF
+ {
+ WRITEF_ATANH_EQ_ONE;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ NOT_MATHERRF {ERRNO_DOMAIN;}
+ }
+ break;
+ }
+ case gammal_overflow:
+ /* gammal overflow */
+ {
+ OVERFLOWL; NAMEL = (char *) "gammal";
+ ifSVID
+ {
+ RETVAL_HUGEL;
+ }
+ else
+ {
+ RETVAL_HUGE_VALL;
+ }
+ NOT_MATHERRL {ERRNO_RANGE;}
+ *(long double *)retval = excl.retval;
+ break;
+ }
+ case gamma_overflow:
+ /* gamma overflow */
+ {
+ OVERFLOWD; NAMED = (char *) "gamma";
+ ifSVID
+ {
+ RETVAL_HUGED;
+ }
+ else
+ {
+ RETVAL_HUGE_VALD;
+ }
+ NOT_MATHERRD {ERRNO_RANGE;}
+ *(double *)retval = exc.retval;
+ break;
+ }
+ case gammaf_overflow:
+ /* gammaf overflow */
+ {
+ OVERFLOWF; NAMEF = (char *) "gammaf";
+ ifSVID
+ {
+ RETVAL_HUGEF;
+ }
+ else
+ {
+ RETVAL_HUGE_VALF;
+ }
+ NOT_MATHERRF {ERRNO_RANGE;}
+ *(float *)retval = excf.retval;
+ break;
+ }
+ case lgammal_overflow:
+ /* lgammal overflow */
+ {
+ OVERFLOWL; NAMEL = (char *) "lgammal";
+ ifSVID
+ {
+ RETVAL_HUGEL;
+ }
+ else
+ {
+ RETVAL_HUGE_VALL;
+ }
+ NOT_MATHERRL {ERRNO_RANGE;}
+ *(long double *)retval = excl.retval;
+ break;
+ }
+ case lgamma_overflow:
+ /* lgamma overflow */
+ {
+ OVERFLOWD; NAMED = (char *) "lgamma";
+ ifSVID
+ {
+ RETVAL_HUGED;
+ }
+ else
+ {
+ RETVAL_HUGE_VALD;
+ }
+ NOT_MATHERRD {ERRNO_RANGE;}
+ *(double *)retval = exc.retval;
+ break;
+ }
+ case lgammaf_overflow:
+ /* lgammaf overflow */
+ {
+ OVERFLOWF; NAMEF = (char *) "lgammaf";
+ ifSVID
+ {
+ RETVAL_HUGEF;
+ }
+ else
+ {
+ RETVAL_HUGE_VALF;
+ }
+ NOT_MATHERRF {ERRNO_RANGE;}
+ *(float *)retval = excf.retval;
+ break;
+ }
+ case lgammal_negative:
+ /* lgammal -int or 0 */
+ {
+ SINGL; NAMEL = (char *) "lgammal";
+ ifSVID
+ {
+ RETVAL_HUGEL;
+ NOT_MATHERRL
+ {
+ WRITEL_LGAMMA_NEGATIVE;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_HUGE_VALL;
+ NOT_MATHERRL {ERRNO_DOMAIN;}
+ }
+ *(long double *)retval = excl.retval;
+ break;
+ }
+ case lgamma_negative:
+ /* lgamma -int or 0 */
+ {
+ SINGD; NAMED = (char *) "lgamma";
+ ifSVID
+ {
+ RETVAL_HUGED;
+ NOT_MATHERRD
+ {
+ WRITED_LGAMMA_NEGATIVE;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_HUGE_VALD;
+ NOT_MATHERRD {ERRNO_DOMAIN;}
+ }
+ *(double *)retval = exc.retval;
+ break;
+ }
+ case lgammaf_negative:
+ /* lgammaf -int or 0 */
+ {
+ SINGF; NAMEF = (char *) "lgammaf";
+ ifSVID
+ {
+ RETVAL_HUGEF;
+ NOT_MATHERRF
+ {
+ WRITEF_LGAMMA_NEGATIVE;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_HUGE_VALF;
+ NOT_MATHERRF {ERRNO_DOMAIN;}
+ }
+ *(float *)retval = excf.retval;
+ break;
+ }
+ case gammal_negative:
+ /* gammal -int or 0 */
+ {
+ SINGL; NAMEL = (char *) "gammal";
+ ifSVID
+ {
+ RETVAL_HUGEL;
+ NOT_MATHERRL
+ {
+ WRITEL_GAMMA_NEGATIVE;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_HUGE_VALL;
+ NOT_MATHERRL {ERRNO_DOMAIN;}
+ }
+ *(long double *)retval = excl.retval;
+ break;
+ }
+ case gamma_negative:
+ /* gamma -int or 0 */
+ {
+ SINGD; NAMED = (char *) "gamma";
+ ifSVID
+ {
+ RETVAL_HUGED;
+ NOT_MATHERRD
+ {
+ WRITED_GAMMA_NEGATIVE;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_HUGE_VALD;
+ NOT_MATHERRD {ERRNO_DOMAIN;}
+ }
+ *(double *)retval = exc.retval;
+ break;
+ }
+ case gammaf_negative:
+ /* gammaf -int or 0 */
+ {
+ SINGF; NAMEF = (char *) "gammaf";
+ ifSVID
+ {
+ RETVAL_HUGEF;
+ NOT_MATHERRF
+ {
+ WRITEF_GAMMA_NEGATIVE;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_HUGE_VALF;
+ NOT_MATHERRF {ERRNO_DOMAIN;}
+ }
+ *(float *)retval = excf.retval;
+ break;
+ }
+ case j0l_gt_loss:
+ /* j0l > loss */
+ {
+ TLOSSL; NAMEL = (char *) "j0l";
+ RETVAL_ZEROL;
+ ifSVID
+ {
+ NOT_MATHERRL
+ {
+ WRITEL_J0_TLOSS;
+ ERRNO_RANGE;
+ }
+ }
+ else
+ {
+ NOT_MATHERRL {ERRNO_RANGE;}
+ }
+ *(long double *)retval = excl.retval;
+ break;
+ }
+ case j0_gt_loss:
+ /* j0 > loss */
+ {
+ TLOSSD; NAMED = (char *) "j0";
+ RETVAL_ZEROD;
+ ifSVID
+ {
+ NOT_MATHERRD
+ {
+ WRITED_J0_TLOSS;
+ ERRNO_RANGE;
+ }
+ }
+ else
+ {
+ NOT_MATHERRD {ERRNO_RANGE;}
+ }
+ *(double*)retval = exc.retval;
+ break;
+ }
+ case j0f_gt_loss:
+ /* j0f > loss */
+ {
+ TLOSSF; NAMEF = (char *) "j0f";
+ RETVAL_ZEROF;
+ ifSVID
+ {
+ NOT_MATHERRF
+ {
+ WRITEF_J0_TLOSS;
+ ERRNO_RANGE;
+ }
+ }
+ else
+ {
+ NOT_MATHERRF {ERRNO_RANGE;}
+ }
+ *(float*)retval = excf.retval;
+ break;
+ }
+ case j1l_gt_loss:
+ /* j1l > loss */
+ {
+ TLOSSL; NAMEL = (char *) "j1l";
+ RETVAL_ZEROL;
+ ifSVID
+ {
+ NOT_MATHERRL
+ {
+ WRITEL_J1_TLOSS;
+ ERRNO_RANGE;
+ }
+ }
+ else
+ {
+ NOT_MATHERRL {ERRNO_RANGE;}
+ }
+ *(long double *)retval = excl.retval;
+ break;
+ }
+ case j1_gt_loss:
+ /* j1 > loss */
+ {
+ TLOSSD; NAMED = (char *) "j1";
+ RETVAL_ZEROD;
+ ifSVID
+ {
+ NOT_MATHERRD
+ {
+ WRITED_J1_TLOSS;
+ ERRNO_RANGE;
+ }
+ }
+ else
+ {
+ NOT_MATHERRD {ERRNO_RANGE;}
+ }
+ *(double*)retval = exc.retval;
+ break;
+ }
+ case j1f_gt_loss:
+ /* j1f > loss */
+ {
+ TLOSSF; NAMEF = (char *) "j1f";
+ RETVAL_ZEROF;
+ ifSVID
+ {
+ NOT_MATHERRF
+ {
+ WRITEF_J1_TLOSS;
+ ERRNO_RANGE;
+ }
+ }
+ else
+ {
+ NOT_MATHERRF {ERRNO_RANGE;}
+ }
+ *(float*)retval = excf.retval;
+ break;
+ }
+ case jnl_gt_loss:
+ /* jnl > loss */
+ {
+ TLOSSL; NAMEL = (char *) "jnl";
+ RETVAL_ZEROL;
+ ifSVID
+ {
+ NOT_MATHERRL
+ {
+ WRITEL_JN_TLOSS;
+ ERRNO_RANGE;
+ }
+ }
+ else
+ {
+ NOT_MATHERRL {ERRNO_RANGE;}
+ }
+ *(long double *)retval = excl.retval;
+ break;
+ }
+ case jn_gt_loss:
+ /* jn > loss */
+ {
+ TLOSSD; NAMED = (char *) "jn";
+ RETVAL_ZEROD;
+ ifSVID
+ {
+ NOT_MATHERRD
+ {
+ WRITED_JN_TLOSS;
+ ERRNO_RANGE;
+ }
+ }
+ else
+ {
+ NOT_MATHERRD {ERRNO_RANGE;}
+ }
+ *(double*)retval = exc.retval;
+ break;
+ }
+ case jnf_gt_loss:
+ /* jnf > loss */
+ {
+ TLOSSF; NAMEF = (char *) "jnf";
+ RETVAL_ZEROF;
+ ifSVID
+ {
+ NOT_MATHERRF
+ {
+ WRITEF_JN_TLOSS;
+ ERRNO_RANGE;
+ }
+ }
+ else
+ {
+ NOT_MATHERRF {ERRNO_RANGE;}
+ }
+ *(float*)retval = excf.retval;
+ break;
+ }
+ case y0l_gt_loss:
+ /* y0l > loss */
+ {
+ TLOSSL; NAMEL = (char *) "y0l";
+ RETVAL_ZEROL;
+ ifSVID
+ {
+ NOT_MATHERRL
+ {
+ WRITEL_Y0_TLOSS;
+ ERRNO_RANGE;
+ }
+ }
+ else
+ {
+ NOT_MATHERRL {ERRNO_RANGE;}
+ }
+ *(long double *)retval = excl.retval;
+ break;
+ }
+ case y0_gt_loss:
+ /* y0 > loss */
+ {
+ TLOSSD; NAMED = (char *) "y0";
+ RETVAL_ZEROD;
+ ifSVID
+ {
+ NOT_MATHERRD
+ {
+ WRITED_Y0_TLOSS;
+ ERRNO_RANGE;
+ }
+ }
+ else
+ {
+ NOT_MATHERRD {ERRNO_RANGE;}
+ }
+ *(double*)retval = exc.retval;
+ break;
+ }
+ case y0f_gt_loss:
+ /* y0f > loss */
+ {
+ TLOSSF; NAMEF = (char *) "y0f";
+ RETVAL_ZEROF;
+ ifSVID
+ {
+ NOT_MATHERRF
+ {
+ WRITEF_Y0_TLOSS;
+ ERRNO_RANGE;
+ }
+ }
+ else
+ {
+ NOT_MATHERRF {ERRNO_RANGE;}
+ }
+ *(float*)retval = excf.retval;
+ break;
+ }
+ case y0l_zero:
+ /* y0l(0) */
+ {
+ DOMAINL; NAMEL = (char *) "y0l";
+ ifSVID
+ {
+ RETVAL_NEG_HUGEL;
+ NOT_MATHERRL
+ {
+ WRITEL_Y0_ZERO;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_NEG_HUGE_VALL;
+ NOT_MATHERRL {ERRNO_DOMAIN;}
+ }
+ *(long double *)retval = excl.retval;
+ break;
+ }
+ case y0_zero:
+ /* y0(0) */
+ {
+ DOMAIND; NAMED = (char *) "y0";
+ ifSVID
+ {
+ RETVAL_NEG_HUGED;
+ NOT_MATHERRD
+ {
+ WRITED_Y0_ZERO;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_NEG_HUGE_VALD;
+ NOT_MATHERRD {ERRNO_DOMAIN;}
+ }
+ *(double *)retval = exc.retval;
+ break;
+ }
+ case y0f_zero:
+ /* y0f(0) */
+ {
+ DOMAINF; NAMEF = (char *) "y0f";
+ ifSVID
+ {
+ RETVAL_NEG_HUGEF;
+ NOT_MATHERRF
+ {
+ WRITEF_Y0_ZERO;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_NEG_HUGE_VALF;
+ NOT_MATHERRF {ERRNO_DOMAIN;}
+ }
+ *(float *)retval = excf.retval;
+ break;
+ }
+ case y1l_gt_loss:
+ /* y1l > loss */
+ {
+ TLOSSL; NAMEL = (char *) "y1l";
+ RETVAL_ZEROL;
+ ifSVID
+ {
+ NOT_MATHERRL
+ {
+ WRITEL_Y1_TLOSS;
+ ERRNO_RANGE;
+ }
+ }
+ else
+ {
+ NOT_MATHERRL {ERRNO_RANGE;}
+ }
+ *(long double *)retval = excl.retval;
+ break;
+ }
+ case y1_gt_loss:
+ /* y1 > loss */
+ {
+ TLOSSD; NAMED = (char *) "y1";
+ RETVAL_ZEROD;
+ ifSVID
+ {
+ NOT_MATHERRD
+ {
+ WRITED_Y1_TLOSS;
+ ERRNO_RANGE;
+ }
+ }
+ else
+ {
+ NOT_MATHERRD {ERRNO_RANGE;}
+ }
+ *(double*)retval = exc.retval;
+ break;
+ }
+ case y1f_gt_loss:
+ /* y1f > loss */
+ {
+ TLOSSF; NAMEF = (char *) "y1f";
+ RETVAL_ZEROF;
+ ifSVID
+ {
+ NOT_MATHERRF
+ {
+ WRITEF_Y1_TLOSS;
+ ERRNO_RANGE;
+ }
+ }
+ else
+ {
+ NOT_MATHERRF {ERRNO_RANGE;}
+ }
+ *(float*)retval = excf.retval;
+ break;
+ }
+ case y1l_zero:
+ /* y1l(0) */
+ {
+ DOMAINL; NAMEL = (char *) "y1l";
+ ifSVID
+ {
+ RETVAL_NEG_HUGEL;
+ NOT_MATHERRL
+ {
+ WRITEL_Y1_ZERO;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_NEG_HUGE_VALL;
+ NOT_MATHERRL {ERRNO_DOMAIN;}
+ }
+ *(long double *)retval = excl.retval;
+ break;
+ }
+ case y1_zero:
+ /* y1(0) */
+ {
+ DOMAIND; NAMED = (char *) "y1";
+ ifSVID
+ {
+ RETVAL_NEG_HUGED;
+ NOT_MATHERRD
+ {
+ WRITED_Y1_ZERO;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_NEG_HUGE_VALD;
+ NOT_MATHERRD {ERRNO_DOMAIN;}
+ }
+ *(double *)retval = exc.retval;
+ break;
+ }
+ case y1f_zero:
+ /* y1f(0) */
+ {
+ DOMAINF; NAMEF = (char *) "y1f";
+ ifSVID
+ {
+ RETVAL_NEG_HUGEF;
+ NOT_MATHERRF
+ {
+ WRITEF_Y1_ZERO;
+ ERRNO_DOMAIN;
+ }
+ }else
+ {
+ RETVAL_NEG_HUGE_VALF;
+ NOT_MATHERRF {ERRNO_DOMAIN;}
+ }
+ *(float *)retval = excf.retval;
+ break;
+ }
+ case ynl_gt_loss:
+ /* ynl > loss */
+ {
+ TLOSSL; NAMEL = (char *) "ynl";
+ RETVAL_ZEROL;
+ ifSVID
+ {
+ NOT_MATHERRL
+ {
+ WRITEL_YN_TLOSS;
+ ERRNO_RANGE;
+ }
+ }
+ else
+ {
+ NOT_MATHERRL {ERRNO_RANGE;}
+ }
+ *(long double *)retval = excl.retval;
+ break;
+ }
+ case yn_gt_loss:
+ /* yn > loss */
+ {
+ TLOSSD; NAMED = (char *) "yn";
+ RETVAL_ZEROD;
+ ifSVID
+ {
+ NOT_MATHERRD
+ {
+ WRITED_YN_TLOSS;
+ ERRNO_RANGE;
+ }
+ }
+ else
+ {
+ NOT_MATHERRD {ERRNO_RANGE;}
+ }
+ *(double*)retval = exc.retval;
+ break;
+ }
+ case ynf_gt_loss:
+ /* ynf > loss */
+ {
+ TLOSSF; NAMEF = (char *) "ynf";
+ RETVAL_ZEROF;
+ ifSVID
+ {
+ NOT_MATHERRF
+ {
+ WRITEF_YN_TLOSS;
+ ERRNO_RANGE;
+ }
+ }
+ else
+ {
+ NOT_MATHERRF {ERRNO_RANGE;}
+ }
+ *(float*)retval = excf.retval;
+ break;
+ }
+ case ynl_zero:
+ /* ynl(0) */
+ {
+ DOMAINL; NAMEL = (char *) "ynl";
+ ifSVID
+ {
+ RETVAL_NEG_HUGEL;
+ NOT_MATHERRL
+ {
+ WRITEL_YN_ZERO;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_NEG_HUGE_VALL;
+ NOT_MATHERRL {ERRNO_DOMAIN;}
+ }
+ *(long double *)retval = excl.retval;
+ break;
+ }
+ case yn_zero:
+ /* yn(0) */
+ {
+ DOMAIND; NAMED = (char *) "yn";
+ ifSVID
+ {
+ RETVAL_NEG_HUGED;
+ NOT_MATHERRD
+ {
+ WRITED_YN_ZERO;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_NEG_HUGE_VALD;
+ NOT_MATHERRD {ERRNO_DOMAIN;}
+ }
+ *(double *)retval = exc.retval;
+ break;
+ }
+ case ynf_zero:
+ /* ynf(0) */
+ {
+ DOMAINF; NAMEF = (char *) "ynf";
+ ifSVID
+ {
+ RETVAL_NEG_HUGEF;
+ NOT_MATHERRF
+ {
+ WRITEF_YN_ZERO;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_NEG_HUGE_VALF;
+ NOT_MATHERRF {ERRNO_DOMAIN;}
+ }
+ *(float *)retval = excf.retval;
+ break;
+ }
+ case y0l_negative:
+ /* y0l(x<0) */
+ {
+ DOMAINL; NAMEL = (char *) "y0l";
+ ifSVID
+ {
+ RETVAL_NEG_HUGEL;
+ NOT_MATHERRL
+ {
+ WRITEL_Y0_NEGATIVE;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_NEG_HUGE_VALL;
+ NOT_MATHERRL {ERRNO_DOMAIN;}
+ }
+ *(long double *)retval = excl.retval;
+ break;
+ }
+ case y0_negative:
+ /* y0(x<0) */
+ {
+ DOMAIND; NAMED = (char *) "y0";
+ ifSVID
+ {
+ RETVAL_NEG_HUGED;
+ NOT_MATHERRD
+ {
+ WRITED_Y0_NEGATIVE;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_NEG_HUGE_VALD;
+ NOT_MATHERRD {ERRNO_DOMAIN;}
+ }
+ *(double *)retval = exc.retval;
+ break;
+ }
+ case y0f_negative:
+ /* y0f(x<0) */
+ {
+ DOMAINF; NAMEF = (char *) "y0f";
+ ifSVID
+ {
+ RETVAL_NEG_HUGEF;
+ NOT_MATHERRF
+ {
+ WRITEF_Y0_NEGATIVE;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_NEG_HUGE_VALF;
+ NOT_MATHERRF {ERRNO_DOMAIN;}
+ }
+ *(float *)retval = excf.retval;
+ break;
+ }
+ case y1l_negative:
+ /* y1l(x<0) */
+ {
+ DOMAINL; NAMEL = (char *) "y1l";
+ ifSVID
+ {
+ RETVAL_NEG_HUGEL;
+ NOT_MATHERRL
+ {
+ WRITEL_Y1_NEGATIVE;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_NEG_HUGE_VALL;
+ NOT_MATHERRL {ERRNO_DOMAIN;}
+ }
+ *(long double *)retval = excl.retval;
+ break;
+ }
+ case y1_negative:
+ /* y1(x<0) */
+ {
+ DOMAIND; NAMED = (char *) "y1";
+ ifSVID
+ {
+ RETVAL_NEG_HUGED;
+ NOT_MATHERRD
+ {
+ WRITED_Y1_NEGATIUE;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_NEG_HUGE_VALD;
+ NOT_MATHERRD {ERRNO_DOMAIN;}
+ }
+ *(double *)retval = exc.retval;
+ break;
+ }
+ case y1f_negative:
+ /* y1f(x<0) */
+ {
+ DOMAINF; NAMEF = (char *) "y1f";
+ ifSVID
+ {
+ RETVAL_NEG_HUGEF;
+ NOT_MATHERRF
+ {
+ WRITEF_Y1_NEGATIVE;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_NEG_HUGE_VALF;
+ NOT_MATHERRF {ERRNO_DOMAIN;}
+ }
+ *(float *)retval = excf.retval;
+ break;
+ }
+ case ynl_negative:
+ /* ynl(x<0) */
+ {
+ DOMAINL; NAMEL = (char *) "ynl";
+ ifSVID
+ {
+ RETVAL_NEG_HUGEL;
+ NOT_MATHERRL
+ {
+ WRITEL_YN_NEGATIVE;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_NEG_HUGE_VALL;
+ NOT_MATHERRL {ERRNO_DOMAIN;}
+ }
+ *(long double *)retval = excl.retval;
+ break;
+ }
+ case yn_negative:
+ /* yn(x<0) */
+ {
+ DOMAIND; NAMED = (char *) "yn";
+ ifSVID
+ {
+ RETVAL_NEG_HUGED;
+ NOT_MATHERRD
+ {
+ WRITED_YN_NEGATIVE;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_NEG_HUGE_VALD;
+ NOT_MATHERRD {ERRNO_DOMAIN;}
+ }
+ *(double *)retval = exc.retval;
+ break;
+ }
+ case ynf_negative:
+ /* ynf(x<0) */
+ {
+ DOMAINF; NAMEF = (char *) "ynf";
+ ifSVID
+ {
+ RETVAL_NEG_HUGEF;
+ NOT_MATHERRF
+ {
+ WRITEF_YN_NEGATIVE;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_NEG_HUGE_VALF;
+ NOT_MATHERRF {ERRNO_DOMAIN;}
+ }
+ *(float *)retval = excf.retval;
+ break;
+ }
+ case fmodl_by_zero:
+ /* fmodl(x,0) */
+ {
+ DOMAINL; NAMEL = (char *) "fmodl";
+ ifSVID
+ {
+ *(long double *)retval = *(long double *)arg1;
+ NOT_MATHERRL
+ {
+ WRITEL_FMOD;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ { /* NaN already computed */
+ NOT_MATHERRL {ERRNO_DOMAIN;}
+ }
+ *(long double *)retval = excl.retval;
+ break;
+ }
+ case fmod_by_zero:
+ /* fmod(x,0) */
+ {
+ DOMAIND; NAMED = (char *) "fmod";
+ ifSVID
+ {
+ *(double *)retval = *(double *)arg1;
+ NOT_MATHERRD
+ {
+ WRITED_FMOD;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ { /* NaN already computed */
+ NOT_MATHERRD {ERRNO_DOMAIN;}
+ }
+ *(double *)retval = exc.retval;
+ break;
+ }
+ case fmodf_by_zero:
+ /* fmodf(x,0) */
+ {
+ DOMAINF; NAMEF = (char *) "fmodf";
+ ifSVID
+ {
+ *(float *)retval = *(float *)arg1;
+ NOT_MATHERRF
+ {
+ WRITEF_FMOD;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ NOT_MATHERRF {ERRNO_DOMAIN;}
+ }
+ *(float *)retval = excf.retval;
+ break;
+ }
+ case remainderl_by_zero:
+ /* remainderl(x,0) */
+ {
+ DOMAINL; NAMEL = (char *) "remainderl";
+ ifSVID
+ {
+ NOT_MATHERRL
+ {
+ WRITEL_REM;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ { /* NaN already computed */
+ NOT_MATHERRL {ERRNO_DOMAIN;}
+ }
+ *(long double *)retval = excl.retval;
+ break;
+ }
+ case remainder_by_zero:
+ /* remainder(x,0) */
+ {
+ DOMAIND; NAMED = (char *) "remainder";
+ ifSVID
+ {
+ NOT_MATHERRD
+ {
+ WRITED_REM;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ { /* NaN already computed */
+ NOT_MATHERRD {ERRNO_DOMAIN;}
+ }
+ *(double *)retval = exc.retval;
+ break;
+ }
+ case remainderf_by_zero:
+ /* remainderf(x,0) */
+ {
+ DOMAINF; NAMEF = (char *) "remainderf";
+ ifSVID
+ {
+ NOT_MATHERRF
+ {
+ WRITEF_REM;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ NOT_MATHERRF {ERRNO_DOMAIN;}
+ }
+ *(float *)retval = excf.retval;
+ break;
+ }
+ default:
+ abort();
+ }
+ return;
+ }
+}
diff --git a/sysdeps/ia64/fpu/libm_frexp4.S b/sysdeps/ia64/fpu/libm_frexp4.S
new file mode 100644
index 0000000..f20a00b
--- /dev/null
+++ b/sysdeps/ia64/fpu/libm_frexp4.S
@@ -0,0 +1,185 @@
+.file "libm_frexp_4.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00: Initial version
+// 3/20/00: Improved speed
+// 6/01/00: Fixed bug when x a double-extended denormal
+// 12/08/00 Corrected label on .endp
+//
+// API
+//==============================================================
+// double frexp(double x, int* y)
+// double __libm_frexp_4(double x, int* y)
+// where int* y is a 32-bit integer
+//
+// Overview of operation
+//==============================================================
+// break a floating point x number into fraction and an exponent
+// The fraction is returned as a double
+// The exponent is returned as an integer pointed to by y
+// This is a true (not a biased exponent) but 0fffe is subtracted
+// as a bias instead of 0xffff. This is because the fraction returned
+// is between 0.5 and 1.0, not the expected IEEE range.
+//
+// The fraction is 0.5 <= fraction < 1.0
+//
+// Registers used
+//==============================================================
+//
+// general registers:
+// r14 exponent bias for x negative
+// r15 exponent bias for x positive
+// r16 signexp of x
+// r17 exponent mask
+// r18 exponent of x
+// r19 exponent result
+// r20 signexp of 2^64
+// r32 on input contains the 64-bit IEEE double that is in f8
+// r33 on input pointer to 32-bit integer for exponent
+//
+// predicate registers:
+// p6 set if x is Nan, zero, or infinity
+// p7 set if x negative
+// p8 set if x positive
+// p9 set if x double-extended denormal
+//
+// floating-point registers:
+// f8 input, output
+// f9 normalized x
+// f10 signexp for significand result for x positive
+// f11 signexp for significand result for x negative
+// f12 2^64
+
+#include "libm_support.h"
+
+.align 32
+.global __libm_frexp_4#
+
+.section .text
+.proc __libm_frexp_4#
+.align 32
+
+__libm_frexp_4:
+
+// Set signexp for significand result for x>0
+// If x is a NaN, zero, or infinity, return it.
+// Put 0 in the int pointer.
+// x NAN, ZERO, INFINITY?
+// Set signexp for significand result for x<0
+{ .mfi
+(p0) mov r15 = 0x0fffe
+(p0) fclass.m.unc p6,p0 = f8, 0xe7
+(p0) mov r14 = 0x2fffe
+}
+// Form signexp of 2^64 in case x double-extended denormal
+// Save the normalized value of input in f9
+// The normalization also sets fault flags and takes faults if necessary
+{ .mfi
+(p0) mov r20 = 0x1003f
+(p0) fnorm f9 = f8
+ nop.i 999 ;;
+}
+
+// Move signexp for significand result for x>0 to FP reg
+// Form 2^64 in case x double-extended denormal
+{ .mmi
+(p0) setf.exp f10 = r15
+(p0) setf.exp f12 = r20
+ nop.i 999 ;;
+}
+
+// Move signexp for significand result for x<0 to FP reg
+// If x NAN, ZERO, INFINITY, set *y=0 as a 32-bit integer, and exit
+{ .mmb
+(p0) setf.exp f11 = r14
+(p6) st4 [r33] = r0
+(p6) br.ret.spnt b0 ;;
+}
+
+// Form exponent mask
+// p7 if x<0, else p8
+{ .mfi
+(p0) mov r17 = 0x1ffff
+(p0) fcmp.lt.unc p7,p8 = f8,f0
+ nop.i 999 ;;
+}
+
+// Test for fnorm(x) denormal, means x double-extended denormal
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p9,p0 = f9, 0x0b
+ nop.i 999 ;;
+}
+
+// If x double-extended denormal add 64 to exponent bias for scaling
+// If x double-extended denormal multiply x * 2^64 which is normal
+{ .mfi
+(p9) add r15 = 64, r15
+(p9) fmpy f9 = f9, f12
+ nop.i 999 ;;
+}
+
+// true exponent stored to int pointer
+// the bias is treated as 0xfffe instead of
+// normal 0xffff because we want the significand
+// to be in the range <=0.5 sig < 1.0
+// Store the value of the exponent at the pointer in r33
+
+// If x>0 form significand result
+{ .mfi
+ nop.m 999
+(p8) fmerge.se f8 = f10,f9
+ nop.i 999 ;;
+}
+
+// Get signexp of normalized x
+// If x<0 form significand result
+{ .mfi
+(p0) getf.exp r16 = f9
+(p7) fmerge.se f8 = f11,f9
+ nop.i 999 ;;
+}
+
+// Get exp of normalized x
+// Subtract off bias to get true exponent of x
+{ .mmi
+(p0) and r18 = r17,r16 ;;
+(p0) sub r19 = r18,r15
+ nop.i 999 ;;
+}
+
+// Store int y as a 32-bit integer
+// Make the value a double
+{ .mfb
+(p0) st4 [r33] = r19
+(p0) fnorm.d f8 = f8
+(p0) br.ret.sptk b0 ;;
+}
+
+.endp __libm_frexp_4
+ASM_SIZE_DIRECTIVE(__libm_frexp_4)
diff --git a/sysdeps/ia64/fpu/libm_frexp4f.S b/sysdeps/ia64/fpu/libm_frexp4f.S
new file mode 100644
index 0000000..d94ad09
--- /dev/null
+++ b/sysdeps/ia64/fpu/libm_frexp4f.S
@@ -0,0 +1,185 @@
+.file "libm_frexp_4f.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00: Initial version
+// 3/20/00: Improved speed
+// 6/01/00: Fixed bug when x a double-extended denormal
+// 12/08/00 Corrected label on .endp
+//
+// API
+//==============================================================
+// float frexp(float x, int* y)
+// float __libm_frexp_4f(float x, int* y)
+// where int* y is a 32-bit integer
+//
+// Overview of operation
+//==============================================================
+// break a floating point x number into fraction and an exponent
+// The fraction is returned as a float
+// The exponent is returned as an integer pointed to by y
+// This is a true (not a biased exponent) but 0fffe is subtracted
+// as a bias instead of 0xffff. This is because the fraction returned
+// is between 0.5 and 1.0, not the expected IEEE range.
+//
+// The fraction is 0.5 <= fraction < 1.0
+//
+// Registers used
+//==============================================================
+
+// general registers:
+// r14 exponent bias for x negative
+// r15 exponent bias for x positive
+// r16 signexp of x
+// r17 exponent mask
+// r18 exponent of x
+// r19 exponent result
+// r20 signexp of 2^64
+// r32 on input contains the 32-bit IEEE float that is in f8
+// r33 on input pointer to 32-bit integer for exponent
+
+// predicate registers:
+// p6 set if x is Nan, zero, or infinity
+// p7 set if x negative
+// p8 set if x positive
+// p9 set if x double-extended denormal
+
+// floating-point registers:
+// f8 input, output
+// f9 normalized x
+// f10 signexp for significand result for x positive
+// f11 signexp for significand result for x negative
+// f12 2^64
+
+#include "libm_support.h"
+
+.align 32
+.global __libm_frexp_4f#
+
+.section .text
+.proc __libm_frexp_4f#
+.align 32
+
+__libm_frexp_4f:
+
+// Set signexp for significand result for x>0
+// If x is a NaN, zero, or infinity, return it.
+// Put 0 in the int pointer.
+// x NAN, ZERO, INFINITY?
+// Set signexp for significand result for x<0
+{ .mfi
+(p0) mov r15 = 0x0fffe
+(p0) fclass.m.unc p6,p0 = f8, 0xe7
+(p0) mov r14 = 0x2fffe
+}
+// Form signexp of 2^64 in case x double-extended denormal
+// Save the normalized value of input in f9
+// The normalization also sets fault flags and takes faults if necessary
+{ .mfi
+(p0) mov r20 = 0x1003f
+(p0) fnorm f9 = f8
+ nop.i 999 ;;
+}
+
+// Move signexp for significand result for x>0 to FP reg
+// Form 2^64 in case x double-extended denormal
+{ .mmi
+(p0) setf.exp f10 = r15
+(p0) setf.exp f12 = r20
+ nop.i 999 ;;
+}
+
+// Move signexp for significand result for x<0 to FP reg
+// If x NAN, ZERO, INFINITY, set *y=0 as a 32-bit integer, and exit
+{ .mmb
+(p0) setf.exp f11 = r14
+(p6) st4 [r33] = r0
+(p6) br.ret.spnt b0 ;;
+}
+
+// Form exponent mask
+// p7 if x<0, else p8
+{ .mfi
+(p0) mov r17 = 0x1ffff
+(p0) fcmp.lt.unc p7,p8 = f8,f0
+ nop.i 999 ;;
+}
+
+// Test for fnorm(x) denormal, means x double-extended denormal
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p9,p0 = f9, 0x0b
+ nop.i 999 ;;
+}
+
+// If x double-extended denormal add 64 to exponent bias for scaling
+// If x double-extended denormal multiply x * 2^64 which is normal
+{ .mfi
+(p9) add r15 = 64, r15
+(p9) fmpy f9 = f9, f12
+ nop.i 999 ;;
+}
+
+// true exponent stored to int pointer
+// the bias is treated as 0xfffe instead of
+// normal 0xffff because we want the significand
+// to be in the range <=0.5 sig < 1.0
+// Store the value of the exponent at the pointer in r33
+
+// If x>0 form significand result
+{ .mfi
+ nop.m 999
+(p8) fmerge.se f8 = f10,f9
+ nop.i 999 ;;
+}
+
+// Get signexp of normalized x
+// If x<0 form significand result
+{ .mfi
+(p0) getf.exp r16 = f9
+(p7) fmerge.se f8 = f11,f9
+ nop.i 999 ;;
+}
+
+// Get exp of normalized x
+// Subtract off bias to get true exponent of x
+{ .mmi
+(p0) and r18 = r17,r16 ;;
+(p0) sub r19 = r18,r15
+ nop.i 999 ;;
+}
+
+// Store int y as a 32-bit integer
+// Make the value a float
+{ .mfb
+(p0) st4 [r33] = r19
+(p0) fnorm.s f8 = f8
+(p0) br.ret.sptk b0 ;;
+}
+
+.endp __libm_frexp_4f
+ASM_SIZE_DIRECTIVE(__libm_frexp_4f)
diff --git a/sysdeps/ia64/fpu/libm_frexp4l.S b/sysdeps/ia64/fpu/libm_frexp4l.S
new file mode 100644
index 0000000..4dfd223
--- /dev/null
+++ b/sysdeps/ia64/fpu/libm_frexp4l.S
@@ -0,0 +1,184 @@
+.file "libm_frexp_4l.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 3/20/00: Initial version
+// 6/01/00: Fixed bug when x a double-extended denormal
+// 12/08/00 Corrected label on .endp
+//
+// API
+//==============================================================
+// long double frexpl(long double x, int* y)
+// long double __libm_frexp_4l(long double x, int* y)
+// where int* y is a 32-bit integer
+//
+// Overview of operation
+//==============================================================
+// break a floating point x number into fraction and an exponent
+// The fraction is returned as a long double
+// The exponent is returned as an integer pointed to by y
+// This is a true (not a biased exponent) but 0fffe is subtracted
+// as a bias instead of 0xffff. This is because the fraction returned
+// is between 0.5 and 1.0, not the expected IEEE range.
+//
+// The fraction is 0.5 <= fraction < 1.0
+//
+// Registers used
+//==============================================================
+//
+// general registers:
+// r14 exponent bias for x negative
+// r15 exponent bias for x positive
+// r16 signexp of x
+// r17 exponent mask
+// r18 exponent of x
+// r19 exponent result
+// r20 signexp of 2^64
+// r32-33 on input contains the 80-bit IEEE long double that is in f8
+// r34 on input pointer to 32-bit integer for exponent
+//
+// predicate registers:
+// p6 set if x is Nan, zero, or infinity
+// p7 set if x negative
+// p8 set if x positive
+// p9 set if x double-extended denormal
+//
+// floating-point registers:
+// f8 input, output
+// f9 normalized x
+// f10 signexp for significand result for x positive
+// f11 signexp for significand result for x negative
+// f12 2^64
+
+#include "libm_support.h"
+
+.align 32
+.global __libm_frexp_4l#
+
+.section .text
+.proc __libm_frexp_4l#
+.align 32
+
+__libm_frexp_4l:
+
+// Set signexp for significand result for x>0
+// If x is a NaN, zero, or infinity, return it.
+// Put 0 in the int pointer.
+// x NAN, ZERO, INFINITY?
+// Set signexp for significand result for x<0
+{ .mfi
+(p0) mov r15 = 0x0fffe
+(p0) fclass.m.unc p6,p0 = f8, 0xe7
+(p0) mov r14 = 0x2fffe
+}
+// Form signexp of 2^64 in case x double-extended denormal
+// Save the normalized value of input in f9
+// The normalization also sets fault flags and takes faults if necessary
+{ .mfi
+(p0) mov r20 = 0x1003f
+(p0) fnorm f9 = f8
+ nop.i 999 ;;
+}
+
+// Move signexp for significand result for x>0 to FP reg
+// Form 2^64 in case x double-extended denormal
+{ .mmi
+(p0) setf.exp f10 = r15
+(p0) setf.exp f12 = r20
+ nop.i 999 ;;
+}
+
+// Move signexp for significand result for x<0 to FP reg
+// If x NAN, ZERO, INFINITY, set *y=0 as a 32-bit integer, and exit
+{ .mmb
+(p0) setf.exp f11 = r14
+(p6) st4 [r34] = r0
+(p6) br.ret.spnt b0 ;;
+}
+
+// Form exponent mask
+// p7 if x<0, else p8
+{ .mfi
+(p0) mov r17 = 0x1ffff
+(p0) fcmp.lt.unc p7,p8 = f8,f0
+ nop.i 999 ;;
+}
+
+// Test for fnorm(x) denormal, means x double-extended denormal
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p9,p0 = f9, 0x0b
+ nop.i 999 ;;
+}
+
+// If x double-extended denormal add 64 to exponent bias for scaling
+// If x double-extended denormal multiply x * 2^64 which is normal
+{ .mfi
+(p9) add r15 = 64, r15
+(p9) fmpy f9 = f9, f12
+ nop.i 999 ;;
+}
+
+// true exponent stored to int pointer
+// the bias is treated as 0xfffe instead of
+// normal 0xffff because we want the significand
+// to be in the range <=0.5 sig < 1.0
+// Store the value of the exponent at the pointer in r34
+
+// If x>0 form significand result
+{ .mfi
+ nop.m 999
+(p8) fmerge.se f8 = f10,f9
+ nop.i 999 ;;
+}
+
+// Get signexp of normalized x
+// If x<0 form significand result
+{ .mfi
+(p0) getf.exp r16 = f9
+(p7) fmerge.se f8 = f11,f9
+ nop.i 999 ;;
+}
+
+// Get exp of normalized x
+// Subtract off bias to get true exponent of x
+{ .mmi
+(p0) and r18 = r17,r16 ;;
+(p0) sub r19 = r18,r15
+ nop.i 999 ;;
+}
+
+// Store int y as a 32-bit integer
+// Make the value a long double
+{ .mfb
+(p0) st4 [r34] = r19
+(p0) fnorm f8 = f8
+(p0) br.ret.sptk b0 ;;
+}
+
+.endp __libm_frexp_4l
+ASM_SIZE_DIRECTIVE(__libm_frexp_4l)
diff --git a/sysdeps/ia64/fpu/libm_reduce.S b/sysdeps/ia64/fpu/libm_reduce.S
new file mode 100644
index 0000000..fb04d36
--- /dev/null
+++ b/sysdeps/ia64/fpu/libm_reduce.S
@@ -0,0 +1,1527 @@
+.file "libm_reduce.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History: 02/02/00 Initial Version
+//
+// *********************************************************************
+// *********************************************************************
+//
+// Function: __libm_pi_by_two_reduce(x) return r, c, and N where
+// x = N * pi/4 + (r+c) , where |r+c| <= pi/4.
+// This function is not designed to be used by the
+// general user.
+//
+// *********************************************************************
+//
+// Accuracy: Returns double-precision values
+//
+// *********************************************************************
+//
+// Resources Used:
+//
+// Floating-Point Registers: f32-f70
+//
+// General Purpose Registers:
+// r8 = return value N
+// r32 = Address of x
+// r33 = Address of where to place r and then c
+// r34-r64
+//
+// Predicate Registers: p6-p14
+//
+// *********************************************************************
+//
+// IEEE Special Conditions:
+//
+// No condions should be raised.
+//
+// *********************************************************************
+//
+// I. Introduction
+// ===============
+//
+// For the forward trigonometric functions sin, cos, sincos, and
+// tan, the original algorithms for IA 64 handle arguments up to
+// 1 ulp less than 2^63 in magnitude. For double-extended arguments x,
+// |x| >= 2^63, this routine returns CASE, N and r_hi, r_lo where
+//
+// x is accurately approximated by
+// 2*K*pi + N * pi/2 + r_hi + r_lo, |r_hi+r_lo| <= pi/4.
+// CASE = 1 or 2.
+// CASE is 1 unless |r_hi + r_lo| < 2^(-33).
+//
+// The exact value of K is not determined, but that information is
+// not required in trigonometric function computations.
+//
+// We first assume the argument x in question satisfies x >= 2^(63).
+// In particular, it is positive. Negative x can be handled by symmetry:
+//
+// -x is accurately approximated by
+// -2*K*pi + (-N) * pi/2 - (r_hi + r_lo), |r_hi+r_lo| <= pi/4.
+//
+// The idea of the reduction is that
+//
+// x * 2/pi = N_big + N + f, |f| <= 1/2
+//
+// Moreover, for double extended x, |f| >= 2^(-75). (This is an
+// non-obvious fact found by enumeration using a special algorithm
+// involving continued fraction.) The algorithm described below
+// calculates N and an accurate approximation of f.
+//
+// Roughly speaking, an appropriate 256-bit (4 X 64) portion of
+// 2/pi is multiplied with x to give the desired information.
+//
+// II. Representation of 2/PI
+// ==========================
+//
+// The value of 2/pi in binary fixed-point is
+//
+// .101000101111100110......
+//
+// We store 2/pi in a table, starting at the position corresponding
+// to bit position 63
+//
+// bit position 63 62 ... 0 -1 -2 -3 -4 -5 -6 -7 .... -16576
+//
+// 0 0 ... 0 . 1 0 1 0 1 0 1 .... X
+//
+// ^
+// |__ implied binary pt
+//
+// III. Algorithm
+// ==============
+//
+// This describes the algorithm in the most natural way using
+// unsigned interger multiplication. The implementation section
+// describes how the integer arithmetic is simulated.
+//
+// STEP 0. Initialization
+// ----------------------
+//
+// Let the input argument x be
+//
+// x = 2^m * ( 1. b_1 b_2 b_3 ... b_63 ), 63 <= m <= 16383.
+//
+// The first crucial step is to fetch four 64-bit portions of 2/pi.
+// To fulfill this goal, we calculate the bit position L of the
+// beginning of these 256-bit quantity by
+//
+// L := 62 - m.
+//
+// Note that -16321 <= L <= -1 because 63 <= m <= 16383; and that
+// the storage of 2/pi is adequate.
+//
+// Fetch P_1, P_2, P_3, P_4 beginning at bit position L thus:
+//
+// bit position L L-1 L-2 ... L-63
+//
+// P_1 = b b b ... b
+//
+// each b can be 0 or 1. Also, let P_0 be the two bits correspoding to
+// bit positions L+2 and L+1. So, when each of the P_j is interpreted
+// with appropriate scaling, we have
+//
+// 2/pi = P_big + P_0 + (P_1 + P_2 + P_3 + P_4) + P_small
+//
+// Note that P_big and P_small can be ignored. The reasons are as follow.
+// First, consider P_big. If P_big = 0, we can certainly ignore it.
+// Otherwise, P_big >= 2^(L+3). Now,
+//
+// P_big * ulp(x) >= 2^(L+3) * 2^(m-63)
+// >= 2^(65-m + m-63 )
+// >= 2^2
+//
+// Thus, P_big * x is an integer of the form 4*K. So
+//
+// x = 4*K * (pi/2) + x*(P_0 + P_1 + P_2 + P_3 + P_4)*(pi/2)
+// + x*P_small*(pi/2).
+//
+// Hence, P_big*x corresponds to information that can be ignored for
+// trigonometic function evaluation.
+//
+// Next, we must estimate the effect of ignoring P_small. The absolute
+// error made by ignoring P_small is bounded by
+//
+// |P_small * x| <= ulp(P_4) * x
+// <= 2^(L-255) * 2^(m+1)
+// <= 2^(62-m-255 + m + 1)
+// <= 2^(-192)
+//
+// Since for double-extended precision, x * 2/pi = integer + f,
+// 0.5 >= |f| >= 2^(-75), the relative error introduced by ignoring
+// P_small is bounded by 2^(-192+75) <= 2^(-117), which is acceptable.
+//
+// Further note that if x is split into x_hi + x_lo where x_lo is the
+// two bits corresponding to bit positions 2^(m-62) and 2^(m-63); then
+//
+// P_0 * x_hi
+//
+// is also an integer of the form 4*K; and thus can also be ignored.
+// Let M := P_0 * x_lo which is a small integer. The main part of the
+// calculation is really the multiplication of x with the four pieces
+// P_1, P_2, P_3, and P_4.
+//
+// Unless the reduced argument is extremely small in magnitude, it
+// suffices to carry out the multiplication of x with P_1, P_2, and
+// P_3. x*P_4 will be carried out and added on as a correction only
+// when it is found to be needed. Note also that x*P_4 need not be
+// computed exactly. A straightforward multiplication suffices since
+// the rounding error thus produced would be bounded by 2^(-3*64),
+// that is 2^(-192) which is small enough as the reduced argument
+// is bounded from below by 2^(-75).
+//
+// Now that we have four 64-bit data representing 2/pi and a
+// 64-bit x. We first need to calculate a highly accurate product
+// of x and P_1, P_2, P_3. This is best understood as integer
+// multiplication.
+//
+//
+// STEP 1. Multiplication
+// ----------------------
+//
+//
+// --------- --------- ---------
+// | P_1 | | P_2 | | P_3 |
+// --------- --------- ---------
+//
+// ---------
+// X | X |
+// ---------
+// ----------------------------------------------------
+//
+// --------- ---------
+// | A_hi | | A_lo |
+// --------- ---------
+//
+//
+// --------- ---------
+// | B_hi | | B_lo |
+// --------- ---------
+//
+//
+// --------- ---------
+// | C_hi | | C_lo |
+// --------- ---------
+//
+// ====================================================
+// --------- --------- --------- ---------
+// | S_0 | | S_1 | | S_2 | | S_3 |
+// --------- --------- --------- ---------
+//
+//
+//
+// STEP 2. Get N and f
+// -------------------
+//
+// Conceptually, after the individual pieces S_0, S_1, ..., are obtained,
+// we have to sum them and obtain an integer part, N, and a fraction, f.
+// Here, |f| <= 1/2, and N is an integer. Note also that N need only to
+// be known to module 2^k, k >= 2. In the case when |f| is small enough,
+// we would need to add in the value x*P_4.
+//
+//
+// STEP 3. Get reduced argument
+// ----------------------------
+//
+// The value f is not yet the reduced argument that we seek. The
+// equation
+//
+// x * 2/pi = 4K + N + f
+//
+// says that
+//
+// x = 2*K*pi + N * pi/2 + f * (pi/2).
+//
+// Thus, the reduced argument is given by
+//
+// reduced argument = f * pi/2.
+//
+// This multiplication must be performed to extra precision.
+//
+// IV. Implementation
+// ==================
+//
+// Step 0. Initialization
+// ----------------------
+//
+// Set sgn_x := sign(x); x := |x|; x_lo := 2 lsb of x.
+//
+// In memory, 2/pi is stored contigously as
+//
+// 0x00000000 0x00000000 0xA2F....
+// ^
+// |__ implied binary bit
+//
+// Given x = 2^m * 1.xxxx...xxx; we calculate L := 62 - m. Thus
+// -1 <= L <= -16321. We fetch from memory 5 integer pieces of data.
+//
+// P_0 is the two bits corresponding to bit positions L+2 and L+1
+// P_1 is the 64-bit starting at bit position L
+// P_2 is the 64-bit starting at bit position L-64
+// P_3 is the 64-bit starting at bit position L-128
+// P_4 is the 64-bit starting at bit position L-192
+//
+// For example, if m = 63, P_0 would be 0 and P_1 would look like
+// 0xA2F...
+//
+// If m = 65, P_0 would be the two msb of 0xA, thus, P_0 is 10 in binary.
+// P_1 in binary would be 1 0 0 0 1 0 1 1 1 1 ....
+//
+// Step 1. Multiplication
+// ----------------------
+//
+// At this point, P_1, P_2, P_3, P_4 are integers. They are
+// supposed to be interpreted as
+//
+// 2^(L-63) * P_1;
+// 2^(L-63-64) * P_2;
+// 2^(L-63-128) * P_3;
+// 2^(L-63-192) * P_4;
+//
+// Since each of them need to be multiplied to x, we would scale
+// both x and the P_j's by some convenient factors: scale each
+// of P_j's up by 2^(63-L), and scale x down by 2^(L-63).
+//
+// p_1 := fcvt.xf ( P_1 )
+// p_2 := fcvt.xf ( P_2 ) * 2^(-64)
+// p_3 := fcvt.xf ( P_3 ) * 2^(-128)
+// p_4 := fcvt.xf ( P_4 ) * 2^(-192)
+// x := replace exponent of x by -1
+// because 2^m * 1.xxxx...xxx * 2^(L-63)
+// is 2^(-1) * 1.xxxx...xxx
+//
+// We are now faced with the task of computing the following
+//
+// --------- --------- ---------
+// | P_1 | | P_2 | | P_3 |
+// --------- --------- ---------
+//
+// ---------
+// X | X |
+// ---------
+// ----------------------------------------------------
+//
+// --------- ---------
+// | A_hi | | A_lo |
+// --------- ---------
+//
+// --------- ---------
+// | B_hi | | B_lo |
+// --------- ---------
+//
+// --------- ---------
+// | C_hi | | C_lo |
+// --------- ---------
+//
+// ====================================================
+// ----------- --------- --------- ---------
+// | S_0 | | S_1 | | S_2 | | S_3 |
+// ----------- --------- --------- ---------
+// ^ ^
+// | |___ binary point
+// |
+// |___ possibly one more bit
+//
+// Let FPSR3 be set to round towards zero with widest precision
+// and exponent range. Unless an explicit FPSR is given,
+// round-to-nearest with widest precision and exponent range is
+// used.
+//
+// Define sigma_C := 2^63; sigma_B := 2^(-1); sigma_C := 2^(-65).
+//
+// Tmp_C := fmpy.fpsr3( x, p_1 );
+// If Tmp_C >= sigma_C then
+// C_hi := Tmp_C;
+// C_lo := x*p_1 - C_hi ...fma, exact
+// Else
+// C_hi := fadd.fpsr3(sigma_C, Tmp_C) - sigma_C
+// ...subtraction is exact, regardless
+// ...of rounding direction
+// C_lo := x*p_1 - C_hi ...fma, exact
+// End If
+//
+// Tmp_B := fmpy.fpsr3( x, p_2 );
+// If Tmp_B >= sigma_B then
+// B_hi := Tmp_B;
+// B_lo := x*p_2 - B_hi ...fma, exact
+// Else
+// B_hi := fadd.fpsr3(sigma_B, Tmp_B) - sigma_B
+// ...subtraction is exact, regardless
+// ...of rounding direction
+// B_lo := x*p_2 - B_hi ...fma, exact
+// End If
+//
+// Tmp_A := fmpy.fpsr3( x, p_3 );
+// If Tmp_A >= sigma_A then
+// A_hi := Tmp_A;
+// A_lo := x*p_3 - A_hi ...fma, exact
+// Else
+// A_hi := fadd.fpsr3(sigma_A, Tmp_A) - sigma_A
+// ...subtraction is exact, regardless
+// ...of rounding direction
+// A_lo := x*p_3 - A_hi ...fma, exact
+// End If
+//
+// ...Note that C_hi is of integer value. We need only the
+// ...last few bits. Thus we can ensure C_hi is never a big
+// ...integer, freeing us from overflow worry.
+//
+// Tmp_C := fadd.fpsr3( C_hi, 2^(70) ) - 2^(70);
+// ...Tmp_C is the upper portion of C_hi
+// C_hi := C_hi - Tmp_C
+// ...0 <= C_hi < 2^7
+//
+// Step 2. Get N and f
+// -------------------
+//
+// At this point, we have all the components to obtain
+// S_0, S_1, S_2, S_3 and thus N and f. We start by adding
+// C_lo and B_hi. This sum together with C_hi gives a good
+// estimation of N and f.
+//
+// A := fadd.fpsr3( B_hi, C_lo )
+// B := max( B_hi, C_lo )
+// b := min( B_hi, C_lo )
+//
+// a := (B - A) + b ...exact. Note that a is either 0
+// ...or 2^(-64).
+//
+// N := round_to_nearest_integer_value( A );
+// f := A - N; ...exact because lsb(A) >= 2^(-64)
+// ...and |f| <= 1/2.
+//
+// f := f + a ...exact because a is 0 or 2^(-64);
+// ...the msb of the sum is <= 1/2
+// ...lsb >= 2^(-64).
+//
+// N := convert to integer format( C_hi + N );
+// M := P_0 * x_lo;
+// N := N + M;
+//
+// If sgn_x == 1 (that is original x was negative)
+// N := 2^10 - N
+// ...this maintains N to be non-negative, but still
+// ...equivalent to the (negated N) mod 4.
+// End If
+//
+// If |f| >= 2^(-33)
+//
+// ...Case 1
+// CASE := 1
+// g := A_hi + B_lo;
+// s_hi := f + g;
+// s_lo := (f - s_hi) + g;
+//
+// Else
+//
+// ...Case 2
+// CASE := 2
+// A := fadd.fpsr3( A_hi, B_lo )
+// B := max( A_hi, B_lo )
+// b := min( A_hi, B_lo )
+//
+// a := (B - A) + b ...exact. Note that a is either 0
+// ...or 2^(-128).
+//
+// f_hi := A + f;
+// f_lo := (f - f_hi) + A;
+// ...this is exact.
+// ...f-f_hi is exact because either |f| >= |A|, in which
+// ...case f-f_hi is clearly exact; or otherwise, 0<|f|<|A|
+// ...means msb(f) <= msb(A) = 2^(-64) => |f| = 2^(-64).
+// ...If f = 2^(-64), f-f_hi involves cancellation and is
+// ...exact. If f = -2^(-64), then A + f is exact. Hence
+// ...f-f_hi is -A exactly, giving f_lo = 0.
+//
+// f_lo := f_lo + a;
+//
+// If |f| >= 2^(-50) then
+// s_hi := f_hi;
+// s_lo := f_lo;
+// Else
+// f_lo := (f_lo + A_lo) + x*p_4
+// s_hi := f_hi + f_lo
+// s_lo := (f_hi - s_hi) + f_lo
+// End If
+//
+// End If
+//
+// Step 3. Get reduced argument
+// ----------------------------
+//
+// If sgn_x == 0 (that is original x is positive)
+//
+// D_hi := Pi_by_2_hi
+// D_lo := Pi_by_2_lo
+// ...load from table
+//
+// Else
+//
+// D_hi := neg_Pi_by_2_hi
+// D_lo := neg_Pi_by_2_lo
+// ...load from table
+// End If
+//
+// r_hi := s_hi*D_hi
+// r_lo := s_hi*D_hi - r_hi ...fma
+// r_lo := (s_hi*D_lo + r_lo) + s_lo*D_hi
+//
+// Return CASE, N, r_hi, r_lo
+//
+
+#include "libm_support.h"
+
+FR_X = f32
+FR_N = f33
+FR_p_1 = f34
+FR_TWOM33 = f35
+FR_TWOM50 = f36
+FR_g = f37
+FR_p_2 = f38
+FR_f = f39
+FR_s_lo = f40
+FR_p_3 = f41
+FR_f_abs = f42
+FR_D_lo = f43
+FR_p_4 = f44
+FR_D_hi = f45
+FR_Tmp2_C = f46
+FR_s_hi = f47
+FR_sigma_A = f48
+FR_A = f49
+FR_sigma_B = f50
+FR_B = f51
+FR_sigma_C = f52
+FR_b = f53
+FR_ScaleP2 = f54
+FR_ScaleP3 = f55
+FR_ScaleP4 = f56
+FR_Tmp_A = f57
+FR_Tmp_B = f58
+FR_Tmp_C = f59
+FR_A_hi = f60
+FR_f_hi = f61
+FR_r_hi = f62
+FR_A_lo = f63
+FR_B_hi = f64
+FR_a = f65
+FR_B_lo = f66
+FR_f_lo = f67
+FR_r_lo = f68
+FR_C_hi = f69
+FR_C_lo = f70
+
+GR_N = r8
+GR_Address_of_Input = r32
+GR_Address_of_Outputs = r33
+GR_Exp_x = r36
+GR_Temp = r37
+GR_BIASL63 = r38
+GR_CASE = r39
+GR_x_lo = r40
+GR_sgn_x = r41
+GR_M = r42
+GR_BASE = r43
+GR_LENGTH1 = r44
+GR_LENGTH2 = r45
+GR_ASUB = r46
+GR_P_0 = r47
+GR_P_1 = r48
+GR_P_2 = r49
+GR_P_3 = r50
+GR_P_4 = r51
+GR_START = r52
+GR_SEGMENT = r53
+GR_A = r54
+GR_B = r55
+GR_C = r56
+GR_D = r57
+GR_E = r58
+GR_TEMP1 = r59
+GR_TEMP2 = r60
+GR_TEMP3 = r61
+GR_TEMP4 = r62
+GR_TEMP5 = r63
+GR_TEMP6 = r64
+
+.align 64
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
+Constants_Bits_of_2_by_pi:
+ASM_TYPE_DIRECTIVE(Constants_Bits_of_2_by_pi,@object)
+data8 0x0000000000000000,0xA2F9836E4E441529
+data8 0xFC2757D1F534DDC0,0xDB6295993C439041
+data8 0xFE5163ABDEBBC561,0xB7246E3A424DD2E0
+data8 0x06492EEA09D1921C,0xFE1DEB1CB129A73E
+data8 0xE88235F52EBB4484,0xE99C7026B45F7E41
+data8 0x3991D639835339F4,0x9C845F8BBDF9283B
+data8 0x1FF897FFDE05980F,0xEF2F118B5A0A6D1F
+data8 0x6D367ECF27CB09B7,0x4F463F669E5FEA2D
+data8 0x7527BAC7EBE5F17B,0x3D0739F78A5292EA
+data8 0x6BFB5FB11F8D5D08,0x56033046FC7B6BAB
+data8 0xF0CFBC209AF4361D,0xA9E391615EE61B08
+data8 0x6599855F14A06840,0x8DFFD8804D732731
+data8 0x06061556CA73A8C9,0x60E27BC08C6B47C4
+data8 0x19C367CDDCE8092A,0x8359C4768B961CA6
+data8 0xDDAF44D15719053E,0xA5FF07053F7E33E8
+data8 0x32C2DE4F98327DBB,0xC33D26EF6B1E5EF8
+data8 0x9F3A1F35CAF27F1D,0x87F121907C7C246A
+data8 0xFA6ED5772D30433B,0x15C614B59D19C3C2
+data8 0xC4AD414D2C5D000C,0x467D862D71E39AC6
+data8 0x9B0062337CD2B497,0xA7B4D55537F63ED7
+data8 0x1810A3FC764D2A9D,0x64ABD770F87C6357
+data8 0xB07AE715175649C0,0xD9D63B3884A7CB23
+data8 0x24778AD623545AB9,0x1F001B0AF1DFCE19
+data8 0xFF319F6A1E666157,0x9947FBACD87F7EB7
+data8 0x652289E83260BFE6,0xCDC4EF09366CD43F
+data8 0x5DD7DE16DE3B5892,0x9BDE2822D2E88628
+data8 0x4D58E232CAC616E3,0x08CB7DE050C017A7
+data8 0x1DF35BE01834132E,0x6212830148835B8E
+data8 0xF57FB0ADF2E91E43,0x4A48D36710D8DDAA
+data8 0x425FAECE616AA428,0x0AB499D3F2A6067F
+data8 0x775C83C2A3883C61,0x78738A5A8CAFBDD7
+data8 0x6F63A62DCBBFF4EF,0x818D67C12645CA55
+data8 0x36D9CAD2A8288D61,0xC277C9121426049B
+data8 0x4612C459C444C5C8,0x91B24DF31700AD43
+data8 0xD4E5492910D5FDFC,0xBE00CC941EEECE70
+data8 0xF53E1380F1ECC3E7,0xB328F8C79405933E
+data8 0x71C1B3092EF3450B,0x9C12887B20AB9FB5
+data8 0x2EC292472F327B6D,0x550C90A7721FE76B
+data8 0x96CB314A1679E279,0x4189DFF49794E884
+data8 0xE6E29731996BED88,0x365F5F0EFDBBB49A
+data8 0x486CA46742727132,0x5D8DB8159F09E5BC
+data8 0x25318D3974F71C05,0x30010C0D68084B58
+data8 0xEE2C90AA4702E774,0x24D6BDA67DF77248
+data8 0x6EEF169FA6948EF6,0x91B45153D1F20ACF
+data8 0x3398207E4BF56863,0xB25F3EDD035D407F
+data8 0x8985295255C06437,0x10D86D324832754C
+data8 0x5BD4714E6E5445C1,0x090B69F52AD56614
+data8 0x9D072750045DDB3B,0xB4C576EA17F9877D
+data8 0x6B49BA271D296996,0xACCCC65414AD6AE2
+data8 0x9089D98850722CBE,0xA4049407777030F3
+data8 0x27FC00A871EA49C2,0x663DE06483DD9797
+data8 0x3FA3FD94438C860D,0xDE41319D39928C70
+data8 0xDDE7B7173BDF082B,0x3715A0805C93805A
+data8 0x921110D8E80FAF80,0x6C4BFFDB0F903876
+data8 0x185915A562BBCB61,0xB989C7BD401004F2
+data8 0xD2277549F6B6EBBB,0x22DBAA140A2F2689
+data8 0x768364333B091A94,0x0EAA3A51C2A31DAE
+data8 0xEDAF12265C4DC26D,0x9C7A2D9756C0833F
+data8 0x03F6F0098C402B99,0x316D07B43915200C
+data8 0x5BC3D8C492F54BAD,0xC6A5CA4ECD37A736
+data8 0xA9E69492AB6842DD,0xDE6319EF8C76528B
+data8 0x6837DBFCABA1AE31,0x15DFA1AE00DAFB0C
+data8 0x664D64B705ED3065,0x29BF56573AFF47B9
+data8 0xF96AF3BE75DF9328,0x3080ABF68C6615CB
+data8 0x040622FA1DE4D9A4,0xB33D8F1B5709CD36
+data8 0xE9424EA4BE13B523,0x331AAAF0A8654FA5
+data8 0xC1D20F3F0BCD785B,0x76F923048B7B7217
+data8 0x8953A6C6E26E6F00,0xEBEF584A9BB7DAC4
+data8 0xBA66AACFCF761D02,0xD12DF1B1C1998C77
+data8 0xADC3DA4886A05DF7,0xF480C62FF0AC9AEC
+data8 0xDDBC5C3F6DDED01F,0xC790B6DB2A3A25A3
+data8 0x9AAF009353AD0457,0xB6B42D297E804BA7
+data8 0x07DA0EAA76A1597B,0x2A12162DB7DCFDE5
+data8 0xFAFEDB89FDBE896C,0x76E4FCA90670803E
+data8 0x156E85FF87FD073E,0x2833676186182AEA
+data8 0xBD4DAFE7B36E6D8F,0x3967955BBF3148D7
+data8 0x8416DF30432DC735,0x6125CE70C9B8CB30
+data8 0xFD6CBFA200A4E46C,0x05A0DD5A476F21D2
+data8 0x1262845CB9496170,0xE0566B0152993755
+data8 0x50B7D51EC4F1335F,0x6E13E4305DA92E85
+data8 0xC3B21D3632A1A4B7,0x08D4B1EA21F716E4
+data8 0x698F77FF2780030C,0x2D408DA0CD4F99A5
+data8 0x20D3A2B30A5D2F42,0xF9B4CBDA11D0BE7D
+data8 0xC1DB9BBD17AB81A2,0xCA5C6A0817552E55
+data8 0x0027F0147F8607E1,0x640B148D4196DEBE
+data8 0x872AFDDAB6256B34,0x897BFEF3059EBFB9
+data8 0x4F6A68A82A4A5AC4,0x4FBCF82D985AD795
+data8 0xC7F48D4D0DA63A20,0x5F57A4B13F149538
+data8 0x800120CC86DD71B6,0xDEC9F560BF11654D
+data8 0x6B0701ACB08CD0C0,0xB24855510EFB1EC3
+data8 0x72953B06A33540C0,0x7BDC06CC45E0FA29
+data8 0x4EC8CAD641F3E8DE,0x647CD8649B31BED9
+data8 0xC397A4D45877C5E3,0x6913DAF03C3ABA46
+data8 0x18465F7555F5BDD2,0xC6926E5D2EACED44
+data8 0x0E423E1C87C461E9,0xFD29F3D6E7CA7C22
+data8 0x35916FC5E0088DD7,0xFFE26A6EC6FDB0C1
+data8 0x0893745D7CB2AD6B,0x9D6ECD7B723E6A11
+data8 0xC6A9CFF7DF7329BA,0xC9B55100B70DB2E2
+data8 0x24BA74607DE58AD8,0x742C150D0C188194
+data8 0x667E162901767A9F,0xBEFDFDEF4556367E
+data8 0xD913D9ECB9BA8BFC,0x97C427A831C36EF1
+data8 0x36C59456A8D8B5A8,0xB40ECCCF2D891234
+data8 0x576F89562CE3CE99,0xB920D6AA5E6B9C2A
+data8 0x3ECC5F114A0BFDFB,0xF4E16D3B8E2C86E2
+data8 0x84D4E9A9B4FCD1EE,0xEFC9352E61392F44
+data8 0x2138C8D91B0AFC81,0x6A4AFBD81C2F84B4
+data8 0x538C994ECC2254DC,0x552AD6C6C096190B
+data8 0xB8701A649569605A,0x26EE523F0F117F11
+data8 0xB5F4F5CBFC2DBC34,0xEEBC34CC5DE8605E
+data8 0xDD9B8E67EF3392B8,0x17C99B5861BC57E1
+data8 0xC68351103ED84871,0xDDDD1C2DA118AF46
+data8 0x2C21D7F359987AD9,0xC0549EFA864FFC06
+data8 0x56AE79E536228922,0xAD38DC9367AAE855
+data8 0x3826829BE7CAA40D,0x51B133990ED7A948
+data8 0x0569F0B265A7887F,0x974C8836D1F9B392
+data8 0x214A827B21CF98DC,0x9F405547DC3A74E1
+data8 0x42EB67DF9DFE5FD4,0x5EA4677B7AACBAA2
+data8 0xF65523882B55BA41,0x086E59862A218347
+data8 0x39E6E389D49EE540,0xFB49E956FFCA0F1C
+data8 0x8A59C52BFA94C5C1,0xD3CFC50FAE5ADB86
+data8 0xC5476243853B8621,0x94792C8761107B4C
+data8 0x2A1A2C8012BF4390,0x2688893C78E4C4A8
+data8 0x7BDBE5C23AC4EAF4,0x268A67F7BF920D2B
+data8 0xA365B1933D0B7CBD,0xDC51A463DD27DDE1
+data8 0x6919949A9529A828,0xCE68B4ED09209F44
+data8 0xCA984E638270237C,0x7E32B90F8EF5A7E7
+data8 0x561408F1212A9DB5,0x4D7E6F5119A5ABF9
+data8 0xB5D6DF8261DD9602,0x36169F3AC4A1A283
+data8 0x6DED727A8D39A9B8,0x825C326B5B2746ED
+data8 0x34007700D255F4FC,0x4D59018071E0E13F
+data8 0x89B295F364A8F1AE,0xA74B38FC4CEAB2BB
+ASM_SIZE_DIRECTIVE(Constants_Bits_of_2_by_pi)
+
+Constants_Bits_of_pi_by_2:
+ASM_TYPE_DIRECTIVE(Constants_Bits_of_pi_by_2,@object)
+data4 0x2168C234,0xC90FDAA2,0x00003FFF,0x00000000
+data4 0x80DC1CD1,0xC4C6628B,0x00003FBF,0x00000000
+ASM_SIZE_DIRECTIVE(Constants_Bits_of_pi_by_2)
+
+.section .text
+.proc __libm_pi_by_2_reduce#
+.global __libm_pi_by_2_reduce#
+.align 64
+
+__libm_pi_by_2_reduce:
+
+// X is at the address in Address_of_Input
+// Place the two-piece result at the address in Address_of_Outputs
+// r followed by c
+// N is returned
+
+{ .mmf
+alloc r34 = ar.pfs,2,34,0,0
+(p0) ldfe FR_X = [GR_Address_of_Input]
+(p0) fsetc.s3 0x00,0x7F ;;
+}
+{ .mlx
+ nop.m 999
+(p0) movl GR_BIASL63 = 0x1003E
+}
+;;
+
+
+// L -1-2-3-4
+// 0 0 0 0 0. 1 0 1 0
+// M 0 1 2 .... 63, 64 65 ... 127, 128
+// ---------------------------------------------
+// Segment 0. 1 , 2 , 3
+// START = M - 63 M = 128 becomes 65
+// LENGTH1 = START & 0x3F 65 become position 1
+// SEGMENT = shr(START,6) + 1 0 maps to 1, 64 maps to 2,
+// LENGTH2 = 64 - LENGTH1
+// Address_BASE = shladd(SEGMENT,3) + BASE
+
+
+
+{ .mmi
+ nop.m 999
+(p0) addl GR_BASE = @ltoff(Constants_Bits_of_2_by_pi#), gp
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ld8 GR_BASE = [GR_BASE]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+
+{ .mlx
+ nop.m 999
+(p0) movl GR_TEMP5 = 0x000000000000FFFE
+}
+{ .mmi
+ nop.m 999 ;;
+(p0) setf.exp FR_sigma_B = GR_TEMP5
+ nop.i 999
+}
+{ .mlx
+ nop.m 999
+(p0) movl GR_TEMP6 = 0x000000000000FFBE ;;
+}
+// Define sigma_C := 2^63; sigma_B := 2^(-1); sigma_A := 2^(-65).
+{ .mfi
+(p0) setf.exp FR_sigma_A = GR_TEMP6
+ nop.f 999
+ nop.i 999 ;;
+}
+// Special Code for testing DE arguments
+// (p0) movl GR_BIASL63 = 0x0000000000013FFE
+// (p0) movl GR_x_lo = 0xFFFFFFFFFFFFFFFF
+// (p0) setf.exp FR_X = GR_BIASL63
+// (p0) setf.sig FR_ScaleP3 = GR_x_lo
+// (p0) fmerge.se FR_X = FR_X,FR_ScaleP3
+// Set sgn_x := sign(x); x := |x|; x_lo := 2 lsb of x.
+// 2/pi is stored contigously as
+// 0x00000000 0x00000000.0xA2F....
+// M = EXP - BIAS ( M >= 63)
+// Given x = 2^m * 1.xxxx...xxx; we calculate L := 62 - m.
+// Thus -1 <= L <= -16321.
+{ .mmf
+(p0) getf.exp GR_Exp_x = FR_X
+(p0) getf.sig GR_x_lo = FR_X
+(p0) fabs FR_X = FR_X ;;
+}
+{ .mii
+(p0) and GR_x_lo = 0x03,GR_x_lo
+(p0) extr.u GR_M = GR_Exp_x,0,17 ;;
+(p0) sub GR_START = GR_M,GR_BIASL63
+}
+{ .mmi
+ nop.m 999 ;;
+(p0) and GR_LENGTH1 = 0x3F,GR_START
+(p0) shr.u GR_SEGMENT = GR_START,6
+}
+{ .mmi
+ nop.m 999 ;;
+(p0) add GR_SEGMENT = 0x1,GR_SEGMENT
+(p0) sub GR_LENGTH2 = 0x40,GR_LENGTH1
+}
+// P_0 is the two bits corresponding to bit positions L+2 and L+1
+// P_1 is the 64-bit starting at bit position L
+// P_2 is the 64-bit starting at bit position L-64
+// P_3 is the 64-bit starting at bit position L-128
+// P_4 is the 64-bit starting at bit position L-192
+// P_1 is made up of Alo and Bhi
+// P_1 = deposit Alo, position 0, length2 into P_1,position length1
+// deposit Bhi, position length2, length1 into P_1, position 0
+// P_2 is made up of Blo and Chi
+// P_2 = deposit Blo, position 0, length2 into P_2, position length1
+// deposit Chi, position length2, length1 into P_2, position 0
+// P_3 is made up of Clo and Dhi
+// P_3 = deposit Clo, position 0, length2 into P_3, position length1
+// deposit Dhi, position length2, length1 into P_3, position 0
+// P_4 is made up of Clo and Dhi
+// P_4 = deposit Dlo, position 0, length2 into P_4, position length1
+// deposit Ehi, position length2, length1 into P_4, position 0
+{ .mmi
+(p0) cmp.le.unc p6,p7 = 0x2,GR_LENGTH1 ;;
+(p0) shladd GR_BASE = GR_SEGMENT,3,GR_BASE
+(p7) cmp.eq.unc p8,p9 = 0x1,GR_LENGTH1 ;;
+}
+{ .mmi
+ nop.m 999
+// ld_64 A at Base and increment Base by 8
+// ld_64 B at Base and increment Base by 8
+// ld_64 C at Base and increment Base by 8
+// ld_64 D at Base and increment Base by 8
+// ld_64 E at Base and increment Base by 8
+// A/B/C/D
+// ---------------------
+// A, B, C, D, and E look like | length1 | length2 |
+// ---------------------
+// hi lo
+(p0) ld8 GR_A = [GR_BASE],8
+(p0) extr.u GR_sgn_x = GR_Exp_x,17,1 ;;
+}
+{ .mmf
+ nop.m 999
+(p0) ld8 GR_B = [GR_BASE],8
+(p0) fmerge.se FR_X = FR_sigma_B,FR_X ;;
+}
+{ .mii
+(p0) ld8 GR_C = [GR_BASE],8
+(p8) extr.u GR_Temp = GR_A,63,1 ;;
+(p0) shl GR_TEMP1 = GR_A,GR_LENGTH1
+}
+{ .mii
+(p0) ld8 GR_D = [GR_BASE],8
+// If length1 >= 2,
+// P_0 = deposit Ahi, position length2, 2 bit into P_0 at position 0.
+(p6) shr.u GR_P_0 = GR_A,GR_LENGTH2 ;;
+(p0) shl GR_TEMP2 = GR_B,GR_LENGTH1
+}
+{ .mii
+(p0) ld8 GR_E = [GR_BASE],-40
+(p0) shr.u GR_P_1 = GR_B,GR_LENGTH2 ;;
+(p0) shr.u GR_P_2 = GR_C,GR_LENGTH2
+}
+// Else
+// Load 16 bit of ASUB from (Base_Address_of_A - 2)
+// P_0 = ASUB & 0x3
+// If length1 == 0,
+// P_0 complete
+// Else
+// Deposit element 63 from Ahi and place in element 0 of P_0.
+// Endif
+// Endif
+{ .mii
+(p7) ld2 GR_ASUB = [GR_BASE],8
+(p0) shl GR_TEMP3 = GR_C,GR_LENGTH1 ;;
+(p0) shl GR_TEMP4 = GR_D,GR_LENGTH1
+}
+{ .mii
+ nop.m 999
+(p0) shr.u GR_P_3 = GR_D,GR_LENGTH2 ;;
+(p0) shr.u GR_P_4 = GR_E,GR_LENGTH2
+}
+{ .mii
+(p7) and GR_P_0 = 0x03,GR_ASUB
+(p6) and GR_P_0 = 0x03,GR_P_0 ;;
+(p0) or GR_P_1 = GR_P_1,GR_TEMP1
+}
+{ .mmi
+(p8) and GR_P_0 = 0x1,GR_P_0 ;;
+(p0) or GR_P_2 = GR_P_2,GR_TEMP2
+(p8) shl GR_P_0 = GR_P_0,0x1 ;;
+}
+{ .mii
+ nop.m 999
+(p0) or GR_P_3 = GR_P_3,GR_TEMP3
+(p8) or GR_P_0 = GR_P_0,GR_Temp
+}
+{ .mmi
+(p0) setf.sig FR_p_1 = GR_P_1 ;;
+(p0) setf.sig FR_p_2 = GR_P_2
+(p0) or GR_P_4 = GR_P_4,GR_TEMP4 ;;
+}
+{ .mmi
+ nop.m 999 ;;
+(p0) setf.sig FR_p_3 = GR_P_3
+(p0) pmpy2.r GR_M = GR_P_0,GR_x_lo
+}
+{ .mlx
+(p0) setf.sig FR_p_4 = GR_P_4
+// P_1, P_2, P_3, P_4 are integers. They should be
+// 2^(L-63) * P_1;
+// 2^(L-63-64) * P_2;
+// 2^(L-63-128) * P_3;
+// 2^(L-63-192) * P_4;
+// Since each of them need to be multiplied to x, we would scale
+// both x and the P_j's by some convenient factors: scale each
+// of P_j's up by 2^(63-L), and scale x down by 2^(L-63).
+// p_1 := fcvt.xf ( P_1 )
+// p_2 := fcvt.xf ( P_2 ) * 2^(-64)
+// p_3 := fcvt.xf ( P_3 ) * 2^(-128)
+// p_4 := fcvt.xf ( P_4 ) * 2^(-192)
+// x= Set x's exp to -1 because 2^m*1.x...x *2^(L-63)=2^(-1)*1.x...xxx
+// --------- --------- ---------
+// | P_1 | | P_2 | | P_3 |
+// --------- --------- ---------
+// ---------
+// X | X |
+// ---------
+// ----------------------------------------------------
+// --------- ---------
+// | A_hi | | A_lo |
+// --------- ---------
+// --------- ---------
+// | B_hi | | B_lo |
+// --------- ---------
+// --------- ---------
+// | C_hi | | C_lo |
+// --------- ---------
+// ====================================================
+// ----------- --------- --------- ---------
+// | S_0 | | S_1 | | S_2 | | S_3 |
+// ----------- --------- --------- ---------
+// | |___ binary point
+// |___ possibly one more bit
+//
+// Let FPSR3 be set to round towards zero with widest precision
+// and exponent range. Unless an explicit FPSR is given,
+// round-to-nearest with widest precision and exponent range is
+// used.
+(p0) movl GR_TEMP1 = 0x000000000000FFBF
+}
+{ .mmi
+ nop.m 999 ;;
+(p0) setf.exp FR_ScaleP2 = GR_TEMP1
+ nop.i 999
+}
+{ .mlx
+ nop.m 999
+(p0) movl GR_TEMP4 = 0x000000000001003E
+}
+{ .mmi
+ nop.m 999 ;;
+(p0) setf.exp FR_sigma_C = GR_TEMP4
+ nop.i 999
+}
+{ .mlx
+ nop.m 999
+(p0) movl GR_TEMP2 = 0x000000000000FF7F ;;
+}
+{ .mmf
+ nop.m 999
+(p0) setf.exp FR_ScaleP3 = GR_TEMP2
+(p0) fcvt.xuf.s1 FR_p_1 = FR_p_1 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fcvt.xuf.s1 FR_p_2 = FR_p_2
+ nop.i 999
+}
+{ .mlx
+ nop.m 999
+(p0) movl GR_Temp = 0x000000000000FFDE ;;
+}
+{ .mmf
+ nop.m 999
+(p0) setf.exp FR_TWOM33 = GR_Temp
+(p0) fcvt.xuf.s1 FR_p_3 = FR_p_3 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fcvt.xuf.s1 FR_p_4 = FR_p_4
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+// Tmp_C := fmpy.fpsr3( x, p_1 );
+// Tmp_B := fmpy.fpsr3( x, p_2 );
+// Tmp_A := fmpy.fpsr3( x, p_3 );
+// If Tmp_C >= sigma_C then
+// C_hi := Tmp_C;
+// C_lo := x*p_1 - C_hi ...fma, exact
+// Else
+// C_hi := fadd.fpsr3(sigma_C, Tmp_C) - sigma_C
+// C_lo := x*p_1 - C_hi ...fma, exact
+// End If
+// If Tmp_B >= sigma_B then
+// B_hi := Tmp_B;
+// B_lo := x*p_2 - B_hi ...fma, exact
+// Else
+// B_hi := fadd.fpsr3(sigma_B, Tmp_B) - sigma_B
+// B_lo := x*p_2 - B_hi ...fma, exact
+// End If
+// If Tmp_A >= sigma_A then
+// A_hi := Tmp_A;
+// A_lo := x*p_3 - A_hi ...fma, exact
+// Else
+// A_hi := fadd.fpsr3(sigma_A, Tmp_A) - sigma_A
+// Exact, regardless ...of rounding direction
+// A_lo := x*p_3 - A_hi ...fma, exact
+// Endif
+(p0) fmpy.s3 FR_Tmp_C = FR_X,FR_p_1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fmpy.s1 FR_p_2 = FR_p_2,FR_ScaleP2
+ nop.i 999
+}
+{ .mlx
+ nop.m 999
+(p0) movl GR_Temp = 0x0000000000000400
+}
+{ .mlx
+ nop.m 999
+(p0) movl GR_TEMP3 = 0x000000000000FF3F ;;
+}
+{ .mmf
+ nop.m 999
+(p0) setf.exp FR_ScaleP4 = GR_TEMP3
+(p0) fmpy.s1 FR_p_3 = FR_p_3,FR_ScaleP3 ;;
+}
+{ .mlx
+ nop.m 999
+(p0) movl GR_TEMP4 = 0x0000000000010045 ;;
+}
+{ .mmf
+ nop.m 999
+(p0) setf.exp FR_Tmp2_C = GR_TEMP4
+(p0) fmpy.s3 FR_Tmp_B = FR_X,FR_p_2 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fcmp.ge.unc.s1 p12, p9 = FR_Tmp_C,FR_sigma_C
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fmpy.s3 FR_Tmp_A = FR_X,FR_p_3
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p12) mov FR_C_hi = FR_Tmp_C
+ nop.i 999 ;;
+}
+{ .mfi
+(p0) addl GR_BASE = @ltoff(Constants_Bits_of_pi_by_2#), gp
+(p9) fadd.s3 FR_C_hi = FR_sigma_C,FR_Tmp_C
+ nop.i 999
+}
+;;
+
+
+
+// End If
+// Step 3. Get reduced argument
+// If sgn_x == 0 (that is original x is positive)
+// D_hi := Pi_by_2_hi
+// D_lo := Pi_by_2_lo
+// Load from table
+// Else
+// D_hi := neg_Pi_by_2_hi
+// D_lo := neg_Pi_by_2_lo
+// Load from table
+// End If
+
+
+{ .mmi
+ ld8 GR_BASE = [GR_BASE]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+
+{ .mfi
+(p0) ldfe FR_D_hi = [GR_BASE],16
+(p0) fmpy.s1 FR_p_4 = FR_p_4,FR_ScaleP4
+ nop.i 999 ;;
+}
+{ .mfi
+(p0) ldfe FR_D_lo = [GR_BASE],0
+(p0) fcmp.ge.unc.s1 p13, p10 = FR_Tmp_B,FR_sigma_B
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p13) mov FR_B_hi = FR_Tmp_B
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p12) fms.s1 FR_C_lo = FR_X,FR_p_1,FR_C_hi
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p10) fadd.s3 FR_B_hi = FR_sigma_B,FR_Tmp_B
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p9) fsub.s1 FR_C_hi = FR_C_hi,FR_sigma_C
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fcmp.ge.unc.s1 p14, p11 = FR_Tmp_A,FR_sigma_A
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p14) mov FR_A_hi = FR_Tmp_A
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p11) fadd.s3 FR_A_hi = FR_sigma_A,FR_Tmp_A
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p9) fms.s1 FR_C_lo = FR_X,FR_p_1,FR_C_hi
+(p0) cmp.eq.unc p12,p9 = 0x1,GR_sgn_x
+}
+{ .mfi
+ nop.m 999
+(p13) fms.s1 FR_B_lo = FR_X,FR_p_2,FR_B_hi
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p10) fsub.s1 FR_B_hi = FR_B_hi,FR_sigma_B
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+// Note that C_hi is of integer value. We need only the
+// last few bits. Thus we can ensure C_hi is never a big
+// integer, freeing us from overflow worry.
+// Tmp_C := fadd.fpsr3( C_hi, 2^(70) ) - 2^(70);
+// Tmp_C is the upper portion of C_hi
+(p0) fadd.s3 FR_Tmp_C = FR_C_hi,FR_Tmp2_C
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p14) fms.s1 FR_A_lo = FR_X,FR_p_3,FR_A_hi
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p11) fsub.s1 FR_A_hi = FR_A_hi,FR_sigma_A
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+// *******************
+// Step 2. Get N and f
+// *******************
+// We have all the components to obtain
+// S_0, S_1, S_2, S_3 and thus N and f. We start by adding
+// C_lo and B_hi. This sum together with C_hi estimates
+// N and f well.
+// A := fadd.fpsr3( B_hi, C_lo )
+// B := max( B_hi, C_lo )
+// b := min( B_hi, C_lo )
+(p0) fadd.s3 FR_A = FR_B_hi,FR_C_lo
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p10) fms.s1 FR_B_lo = FR_X,FR_p_2,FR_B_hi
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fsub.s1 FR_Tmp_C = FR_Tmp_C,FR_Tmp2_C
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fmax.s1 FR_B = FR_B_hi,FR_C_lo
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fmin.s1 FR_b = FR_B_hi,FR_C_lo
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p11) fms.s1 FR_A_lo = FR_X,FR_p_3,FR_A_hi
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+// N := round_to_nearest_integer_value( A );
+(p0) fcvt.fx.s1 FR_N = FR_A
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+// C_hi := C_hi - Tmp_C ...0 <= C_hi < 2^7
+(p0) fsub.s1 FR_C_hi = FR_C_hi,FR_Tmp_C
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+// a := (B - A) + b: Exact - note that a is either 0 or 2^(-64).
+(p0) fsub.s1 FR_a = FR_B,FR_A
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+// f := A - N; Exact because lsb(A) >= 2^(-64) and |f| <= 1/2.
+(p0) fnorm.s1 FR_N = FR_N
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fadd.s1 FR_a = FR_a,FR_b
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fsub.s1 FR_f = FR_A,FR_N
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+// N := convert to integer format( C_hi + N );
+// M := P_0 * x_lo;
+// N := N + M;
+(p0) fadd.s1 FR_N = FR_N,FR_C_hi
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+// f = f + a Exact because a is 0 or 2^(-64);
+// the msb of the sum is <= 1/2 and lsb >= 2^(-64).
+(p0) fadd.s1 FR_f = FR_f,FR_a
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+// Create 2**(-33)
+//
+(p0) fcvt.fx.s1 FR_N = FR_N
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fabs FR_f_abs = FR_f
+ nop.i 999 ;;
+}
+{ .mfi
+(p0) getf.sig GR_N = FR_N
+ nop.f 999
+ nop.i 999 ;;
+}
+{ .mii
+ nop.m 999
+ nop.i 999 ;;
+(p0) add GR_N = GR_N,GR_M ;;
+}
+// If sgn_x == 1 (that is original x was negative)
+// N := 2^10 - N
+// this maintains N to be non-negative, but still
+// equivalent to the (negated N) mod 4.
+// End If
+{ .mii
+(p12) sub GR_N = GR_Temp,GR_N
+(p0) cmp.eq.unc p12,p9 = 0x0,GR_sgn_x ;;
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fcmp.ge.unc.s1 p13, p10 = FR_f_abs,FR_TWOM33
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p9) fsub.s1 FR_D_hi = f0, FR_D_hi
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p10) fadd.s3 FR_A = FR_A_hi,FR_B_lo
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p13) fadd.s1 FR_g = FR_A_hi,FR_B_lo
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p10) fmax.s1 FR_B = FR_A_hi,FR_B_lo
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p9) fsub.s1 FR_D_lo = f0, FR_D_lo
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p10) fmin.s1 FR_b = FR_A_hi,FR_B_lo
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fsetc.s3 0x7F,0x40
+ nop.i 999
+}
+{ .mlx
+ nop.m 999
+(p10) movl GR_Temp = 0x000000000000FFCD ;;
+}
+{ .mmf
+ nop.m 999
+(p10) setf.exp FR_TWOM50 = GR_Temp
+(p10) fadd.s1 FR_f_hi = FR_A,FR_f ;;
+}
+{ .mfi
+ nop.m 999
+// a := (B - A) + b Exact.
+// Note that a is either 0 or 2^(-128).
+// f_hi := A + f;
+// f_lo := (f - f_hi) + A
+// f_lo=f-f_hi is exact because either |f| >= |A|, in which
+// case f-f_hi is clearly exact; or otherwise, 0<|f|<|A|
+// means msb(f) <= msb(A) = 2^(-64) => |f| = 2^(-64).
+// If f = 2^(-64), f-f_hi involves cancellation and is
+// exact. If f = -2^(-64), then A + f is exact. Hence
+// f-f_hi is -A exactly, giving f_lo = 0.
+// f_lo := f_lo + a;
+(p10) fsub.s1 FR_a = FR_B,FR_A
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p13) fadd.s1 FR_s_hi = FR_f,FR_g
+ nop.i 999 ;;
+}
+{ .mlx
+ nop.m 999
+// If |f| >= 2^(-33)
+// Case 1
+// CASE := 1
+// g := A_hi + B_lo;
+// s_hi := f + g;
+// s_lo := (f - s_hi) + g;
+(p13) movl GR_CASE = 0x1 ;;
+}
+{ .mlx
+ nop.m 999
+// Else
+// Case 2
+// CASE := 2
+// A := fadd.fpsr3( A_hi, B_lo )
+// B := max( A_hi, B_lo )
+// b := min( A_hi, B_lo )
+(p10) movl GR_CASE = 0x2
+}
+{ .mfi
+ nop.m 999
+(p10) fsub.s1 FR_f_lo = FR_f,FR_f_hi
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p10) fadd.s1 FR_a = FR_a,FR_b
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p13) fsub.s1 FR_s_lo = FR_f,FR_s_hi
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p13) fadd.s1 FR_s_lo = FR_s_lo,FR_g
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p10) fcmp.ge.unc.s1 p14, p11 = FR_f_abs,FR_TWOM50
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Create 2**(-50)
+(p10) fadd.s1 FR_f_lo = FR_f_lo,FR_A
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+// If |f| >= 2^(-50) then
+// s_hi := f_hi;
+// s_lo := f_lo;
+// Else
+// f_lo := (f_lo + A_lo) + x*p_4
+// s_hi := f_hi + f_lo
+// s_lo := (f_hi - s_hi) + f_lo
+// End If
+(p14) mov FR_s_hi = FR_f_hi
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p10) fadd.s1 FR_f_lo = FR_f_lo,FR_a
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p14) mov FR_s_lo = FR_f_lo
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p11) fadd.s1 FR_f_lo = FR_f_lo,FR_A_lo
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p11) fma.s1 FR_f_lo = FR_X,FR_p_4,FR_f_lo
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p11) fadd.s1 FR_s_hi = FR_f_hi,FR_f_lo
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+// r_hi := s_hi*D_hi
+// r_lo := s_hi*D_hi - r_hi with fma
+// r_lo := (s_hi*D_lo + r_lo) + s_lo*D_hi
+(p0) fmpy.s1 FR_r_hi = FR_s_hi,FR_D_hi
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p11) fsub.s1 FR_s_lo = FR_f_hi,FR_s_hi
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fms.s1 FR_r_lo = FR_s_hi,FR_D_hi,FR_r_hi
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p11) fadd.s1 FR_s_lo = FR_s_lo,FR_f_lo
+ nop.i 999 ;;
+}
+{ .mmi
+ nop.m 999 ;;
+// Return N, r_hi, r_lo
+// We do not return CASE
+(p0) stfe [GR_Address_of_Outputs] = FR_r_hi,16
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fma.s1 FR_r_lo = FR_s_hi,FR_D_lo,FR_r_lo
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fma.s1 FR_r_lo = FR_s_lo,FR_D_hi,FR_r_lo
+ nop.i 999 ;;
+}
+{ .mmi
+ nop.m 999 ;;
+(p0) stfe [GR_Address_of_Outputs] = FR_r_lo,-16
+ nop.i 999
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+(p0) br.ret.sptk b0 ;;
+}
+
+.endp __libm_pi_by_2_reduce
+ASM_SIZE_DIRECTIVE(__libm_pi_by_2_reduce)
diff --git a/sysdeps/ia64/fpu/libm_support.h b/sysdeps/ia64/fpu/libm_support.h
new file mode 100644
index 0000000..995b104
--- /dev/null
+++ b/sysdeps/ia64/fpu/libm_support.h
@@ -0,0 +1,339 @@
+//
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+
+// History: 02/02/2000 Initial version
+// 2/28/2000 added tags for logb and nextafter
+// 3/22/2000 Changes to support _LIB_VERSION variable
+// and filled some enum gaps. Added support for C99.
+// 5/31/2000 added prototypes for __libm_frexp_4l/8l
+// 8/10/2000 Changed declaration of _LIB_VERSION to work for library
+// builds and other application builds (precompiler directives).
+// 8/11/2000 Added pointers-to-matherr-functions declarations to allow
+// for user-defined matherr functions in the dll build.
+// 12/07/2000 Added scalbn error_types values.
+//
+
+#ifndef ASSEMBLER
+#include <math.h>
+
+float __libm_frexp_4f( float x, int* exp);
+float __libm_frexp_8f( float x, int* exp);
+double __libm_frexp_4( double x, int* exp);
+double __libm_frexp_8( double x, int* exp);
+long double __libm_frexp_4l( long double x, int* exp);
+long double __libm_frexp_8l( long double x, int* exp);
+void __libm_sincos_pi4(double,double*,double*,int);
+void __libm_y0y1(double , double *, double *);
+void __libm_j0j1(double , double *, double *);
+double __libm_lgamma_kernel(double,int*,int,int);
+double __libm_j0(double);
+double __libm_j1(double);
+double __libm_jn(int,double);
+double __libm_y0(double);
+double __libm_y1(double);
+double __libm_yn(int,double);
+
+extern double rint(double);
+extern double sqrt(double);
+extern double fabs(double);
+extern double log(double);
+extern double log1p(double);
+extern double sqrt(double);
+extern double sin(double);
+extern double exp(double);
+extern double modf(double, double *);
+extern double asinh(double);
+extern double acosh(double);
+extern double atanh(double);
+extern double tanh(double);
+extern double erf(double);
+extern double erfc(double);
+extern double j0(double);
+extern double j1(double);
+extern double jn(int, double);
+extern double y0(double);
+extern double y1(double);
+extern double yn(int, double);
+
+extern float fabsf(float);
+extern float asinhf(float);
+extern float acoshf(float);
+extern float atanhf(float);
+extern float tanhf(float);
+extern float erff(float);
+extern float erfcf(float);
+extern float j0f(float);
+extern float j1f(float);
+extern float jnf(int, float);
+extern float y0f(float);
+extern float y1f(float);
+extern float ynf(int, float);
+
+extern long double log1pl(long double);
+extern long double logl(long double);
+extern long double sqrtl(long double);
+extern long double expl(long double);
+
+extern long lround(double);
+extern long lroundf(float);
+extern long lroundl(long double);
+
+#if !(defined(SIZE_INT_32) || defined(SIZE_INT_64))
+ #error integer size not established; define SIZE_INT_32 or SIZE_INT_64
+#endif
+
+struct fp64 { /*/ sign:1 exponent:11 significand:52 (implied leading 1)*/
+ unsigned lo_significand:32;
+ unsigned hi_significand:20;
+ unsigned exponent:11;
+ unsigned sign:1;
+};
+
+#define HI_SIGNIFICAND_LESS(X, HI) ((X)->hi_significand < 0x ## HI)
+#define f64abs(x) ((x) < 0.0 ? -(x) : (x))
+
+typedef enum
+{
+ logl_zero=0, logl_negative, /* 0, 1 */
+ log_zero, log_negative, /* 2, 3 */
+ logf_zero, logf_negative, /* 4, 5 */
+ log10l_zero, log10l_negative, /* 6, 7 */
+ log10_zero, log10_negative, /* 8, 9 */
+ log10f_zero, log10f_negative, /* 10, 11 */
+ expl_overflow, expl_underflow, /* 12, 13 */
+ exp_overflow, exp_underflow, /* 14, 15 */
+ expf_overflow, expf_underflow, /* 16, 17 */
+ powl_overflow, powl_underflow, /* 18, 19 */
+ powl_zero_to_zero, /* 20 */
+ powl_zero_to_negative, /* 21 */
+ powl_neg_to_non_integer, /* 22 */
+ powl_nan_to_zero, /* 23 */
+ pow_overflow, pow_underflow, /* 24, 25 */
+ pow_zero_to_zero, /* 26 */
+ pow_zero_to_negative, /* 27 */
+ pow_neg_to_non_integer, /* 28 */
+ pow_nan_to_zero, /* 29 */
+ powf_overflow, powf_underflow, /* 30, 31 */
+ powf_zero_to_zero, /* 32 */
+ powf_zero_to_negative, /* 33 */
+ powf_neg_to_non_integer, /* 34 */
+ powf_nan_to_zero, /* 35 */
+ atan2l_zero, /* 36 */
+ atan2_zero, /* 37 */
+ atan2f_zero, /* 38 */
+ expm1l_overflow, /* 39 */
+ expm1l_underflow, /* 40 */
+ expm1_overflow, /* 41 */
+ expm1_underflow, /* 42 */
+ expm1f_overflow, /* 43 */
+ expm1f_underflow, /* 44 */
+ hypotl_overflow, /* 45 */
+ hypot_overflow, /* 46 */
+ hypotf_overflow, /* 47 */
+ sqrtl_negative, /* 48 */
+ sqrt_negative, /* 49 */
+ sqrtf_negative, /* 50 */
+ scalbl_overflow, scalbl_underflow, /* 51, 52 */
+ scalb_overflow, scalb_underflow, /* 53, 54 */
+ scalbf_overflow, scalbf_underflow, /* 55, 56 */
+ acosl_gt_one, acos_gt_one, acosf_gt_one, /* 57, 58, 59 */
+ asinl_gt_one, asin_gt_one, asinf_gt_one, /* 60, 61, 62 */
+ coshl_overflow, cosh_overflow, coshf_overflow, /* 63, 64, 65 */
+ y0l_zero, y0l_negative,y0l_gt_loss, /* 66, 67, 68 */
+ y0_zero, y0_negative,y0_gt_loss, /* 69, 70, 71 */
+ y0f_zero, y0f_negative,y0f_gt_loss, /* 72, 73, 74 */
+ y1l_zero, y1l_negative,y1l_gt_loss, /* 75, 76, 77 */
+ y1_zero, y1_negative,y1_gt_loss, /* 78, 79, 80 */
+ y1f_zero, y1f_negative,y1f_gt_loss, /* 81, 82, 83 */
+ ynl_zero, ynl_negative,ynl_gt_loss, /* 84, 85, 86 */
+ yn_zero, yn_negative,yn_gt_loss, /* 87, 88, 89 */
+ ynf_zero, ynf_negative,ynf_gt_loss, /* 90, 91, 92 */
+ j0l_gt_loss, /* 93 */
+ j0_gt_loss, /* 94 */
+ j0f_gt_loss, /* 95 */
+ j1l_gt_loss, /* 96 */
+ j1_gt_loss, /* 97 */
+ j1f_gt_loss, /* 98 */
+ jnl_gt_loss, /* 99 */
+ jn_gt_loss, /* 100 */
+ jnf_gt_loss, /* 101 */
+ lgammal_overflow, lgammal_negative,lgammal_reserve, /* 102, 103, 104 */
+ lgamma_overflow, lgamma_negative,lgamma_reserve, /* 105, 106, 107 */
+ lgammaf_overflow, lgammaf_negative, lgammaf_reserve,/* 108, 109, 110 */
+ gammal_overflow,gammal_negative, gammal_reserve, /* 111, 112, 113 */
+ gamma_overflow, gamma_negative, gamma_reserve, /* 114, 115, 116 */
+ gammaf_overflow,gammaf_negative,gammaf_reserve, /* 117, 118, 119 */
+ fmodl_by_zero, /* 120 */
+ fmod_by_zero, /* 121 */
+ fmodf_by_zero, /* 122 */
+ remainderl_by_zero, /* 123 */
+ remainder_by_zero, /* 124 */
+ remainderf_by_zero, /* 125 */
+ sinhl_overflow, sinh_overflow, sinhf_overflow, /* 126, 127, 128 */
+ atanhl_gt_one, atanhl_eq_one, /* 129, 130 */
+ atanh_gt_one, atanh_eq_one, /* 131, 132 */
+ atanhf_gt_one, atanhf_eq_one, /* 133, 134 */
+ acoshl_lt_one, /* 135 */
+ acosh_lt_one, /* 136 */
+ acoshf_lt_one, /* 137 */
+ log1pl_zero, log1pl_negative, /* 138, 139 */
+ log1p_zero, log1p_negative, /* 140, 141 */
+ log1pf_zero, log1pf_negative, /* 142, 143 */
+ ldexpl_overflow, ldexpl_underflow, /* 144, 145 */
+ ldexp_overflow, ldexp_underflow, /* 146, 147 */
+ ldexpf_overflow, ldexpf_underflow, /* 148, 149 */
+ logbl_zero, logb_zero, logbf_zero, /* 150, 151, 152 */
+ nextafterl_overflow, nextafter_overflow,
+ nextafterf_overflow, /* 153, 154, 155 */
+ ilogbl_zero, ilogb_zero, ilogbf_zero, /* 156, 157, 158 */
+ exp2l_overflow, exp2l_underflow, /* 159, 160 */
+ exp2_overflow, exp2_underflow, /* 161, 162 */
+ exp2f_overflow, exp2f_underflow, /* 163, 164 */
+ exp10l_overflow, exp10_overflow,
+ exp10f_overflow, /* 165, 166, 167 */
+ log2l_zero, log2l_negative, /* 168, 169 */
+ log2_zero, log2_negative, /* 170, 171 */
+ log2f_zero, log2f_negative, /* 172, 173 */
+ scalbnl_overflow, scalbnl_underflow, /* 174, 175 */
+ scalbn_overflow, scalbn_underflow, /* 176, 177 */
+ scalbnf_overflow, scalbnf_underflow /* 178, 179 */
+} error_types;
+
+void __libm_error_support(void*,void*,void*,error_types);
+
+#define BIAS_64 1023
+#define EXPINF_64 2047
+
+#define DOUBLE_HEX(HI, LO) 0x ## LO, 0x ## HI
+
+#if 0
+static const unsigned INF[] = {
+ DOUBLE_HEX(7ff00000, 00000000),
+ DOUBLE_HEX(fff00000, 00000000)
+};
+
+static const double _zeroo = 0.0;
+static const double _bigg = 1.0e300;
+static const double _ponee = 1.0;
+static const double _nonee = -1.0;
+
+#define INVALID (_zeroo * *((double*)&INF[0]))
+#define PINF *((double*)&INF[0])
+#define NINF -PINF
+#define PINF_DZ (_ponee/_zeroo)
+#define X_TLOSS 1.41484755040568800000e+16
+#endif
+
+struct exceptionf
+{
+ int type;
+ char *name;
+ float arg1, arg2, retval;
+};
+
+# ifdef __cplusplus
+struct __exception
+{
+ int type;
+ char *name;
+ double arg1, arg2, retval;
+};
+# else
+
+# ifndef _LIBC
+struct exception
+{
+ int type;
+ char *name;
+ double arg1, arg2, retval;
+};
+# endif
+# endif
+
+
+
+struct exceptionl
+{
+ int type;
+ char *name;
+ long double arg1, arg2, retval;
+};
+
+#ifdef _MS_
+#define MATHERR_F _matherrf
+#define MATHERR_D _matherr
+#else
+#define MATHERR_F matherrf
+#define MATHERR_D matherr
+#endif
+
+# ifdef __cplusplus
+#define EXC_DECL_D __exception
+#else
+// exception is a reserved name in C++
+#define EXC_DECL_D exception
+#endif
+
+extern int MATHERR_F(struct exceptionf*);
+extern int MATHERR_D(struct EXC_DECL_D*);
+extern int matherrl(struct exceptionl*);
+
+
+/* Set these appropriately to make thread Safe */
+#define ERRNO_RANGE errno = ERANGE
+#define ERRNO_DOMAIN errno = EDOM
+
+
+// Add code to support _LIB_VERSION
+#ifndef _LIBC
+typedef enum
+{
+ _IEEE_ = -1, // IEEE-like behavior
+ _SVID_, // SysV, Rel. 4 behavior
+ _XOPEN_, // Unix98
+ _POSIX_, // Posix
+ _ISOC_ // ISO C9X
+} _LIB_VERSION_TYPE;
+
+extern _LIB_VERSION_TYPE _LIB_VERSION;
+#endif
+
+// This is a run-time variable and may effect
+// floating point behavior of the libm functions
+
+#endif /* ASSEMBLER */
+
+/* Support for compatible assembler handling. */
+#if !defined L && defined _LIBC
+#define L(name) .L##name
+#endif
+#ifdef __ELF__
+#define ASM_SIZE_DIRECTIVE(name) .size name,.-name
+#define ASM_TYPE_DIRECTIVE(name,T) .type name,T
+#else
+#define ASM_SIZE_DIRECTIVE(name)
+#define ASM_TYPE_DIRECTIVE(name,T)
+#endif
diff --git a/sysdeps/ia64/fpu/libm_tan.S b/sysdeps/ia64/fpu/libm_tan.S
new file mode 100644
index 0000000..c587d64
--- /dev/null
+++ b/sysdeps/ia64/fpu/libm_tan.S
@@ -0,0 +1,3319 @@
+.file "libm_tan.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// *********************************************************************
+//
+// History:
+// 02/02/00 Initial Version
+// 4/04/00 Unwind support added
+// 12/28/00 Fixed false invalid flags
+//
+// *********************************************************************
+//
+// Function: tan(x) = tangent(x), for double precision x values
+//
+// *********************************************************************
+//
+// Accuracy: Very accurate for double-precision values
+//
+// *********************************************************************
+//
+// Resources Used:
+//
+// Floating-Point Registers: f8 (Input and Return Value)
+// f9-f15
+// f32-f112
+//
+// General Purpose Registers:
+// r32-r48
+// r49-r50 (Used to pass arguments to pi_by_2 reduce routine)
+//
+// Predicate Registers: p6-p15
+//
+// *********************************************************************
+//
+// IEEE Special Conditions:
+//
+// Denormal fault raised on denormal inputs
+// Overflow exceptions do not occur
+// Underflow exceptions raised when appropriate for tan
+// (No specialized error handling for this routine)
+// Inexact raised when appropriate by algorithm
+//
+// tan(SNaN) = QNaN
+// tan(QNaN) = QNaN
+// tan(inf) = QNaN
+// tan(+/-0) = +/-0
+//
+// *********************************************************************
+//
+// Mathematical Description
+//
+// We consider the computation of FPTAN of Arg. Now, given
+//
+// Arg = N pi/2 + alpha, |alpha| <= pi/4,
+//
+// basic mathematical relationship shows that
+//
+// tan( Arg ) = tan( alpha ) if N is even;
+// = -cot( alpha ) otherwise.
+//
+// The value of alpha is obtained by argument reduction and
+// represented by two working precision numbers r and c where
+//
+// alpha = r + c accurately.
+//
+// The reduction method is described in a previous write up.
+// The argument reduction scheme identifies 4 cases. For Cases 2
+// and 4, because |alpha| is small, tan(r+c) and -cot(r+c) can be
+// computed very easily by 2 or 3 terms of the Taylor series
+// expansion as follows:
+//
+// Case 2:
+// -------
+//
+// tan(r + c) = r + c + r^3/3 ...accurately
+// -cot(r + c) = -1/(r+c) + r/3 ...accurately
+//
+// Case 4:
+// -------
+//
+// tan(r + c) = r + c + r^3/3 + 2r^5/15 ...accurately
+// -cot(r + c) = -1/(r+c) + r/3 + r^3/45 ...accurately
+//
+//
+// The only cases left are Cases 1 and 3 of the argument reduction
+// procedure. These two cases will be merged since after the
+// argument is reduced in either cases, we have the reduced argument
+// represented as r + c and that the magnitude |r + c| is not small
+// enough to allow the usage of a very short approximation.
+//
+// The greatest challenge of this task is that the second terms of
+// the Taylor series for tan(r) and -cot(r)
+//
+// r + r^3/3 + 2 r^5/15 + ...
+//
+// and
+//
+// -1/r + r/3 + r^3/45 + ...
+//
+// are not very small when |r| is close to pi/4 and the rounding
+// errors will be a concern if simple polynomial accumulation is
+// used. When |r| < 2^(-2), however, the second terms will be small
+// enough (5 bits or so of right shift) that a normal Horner
+// recurrence suffices. Hence there are two cases that we consider
+// in the accurate computation of tan(r) and cot(r), |r| <= pi/4.
+//
+// Case small_r: |r| < 2^(-2)
+// --------------------------
+//
+// Since Arg = N pi/4 + r + c accurately, we have
+//
+// tan(Arg) = tan(r+c) for N even,
+// = -cot(r+c) otherwise.
+//
+// Here for this case, both tan(r) and -cot(r) can be approximated
+// by simple polynomials:
+//
+// tan(r) = r + P1_1 r^3 + P1_2 r^5 + ... + P1_9 r^19
+// -cot(r) = -1/r + Q1_1 r + Q1_2 r^3 + ... + Q1_7 r^13
+//
+// accurately. Since |r| is relatively small, tan(r+c) and
+// -cot(r+c) can be accurately approximated by replacing r with
+// r+c only in the first two terms of the corresponding polynomials.
+//
+// Note that P1_1 (and Q1_1 for that matter) approximates 1/3 to
+// almost 64 sig. bits, thus
+//
+// P1_1 (r+c)^3 = P1_1 r^3 + c * r^2 accurately.
+//
+// Hence,
+//
+// tan(r+c) = r + P1_1 r^3 + P1_2 r^5 + ... + P1_9 r^19
+// + c*(1 + r^2)
+//
+// -cot(r+c) = -1/(r+c) + Q1_1 r + Q1_2 r^3 + ... + Q1_7 r^13
+// + Q1_1*c
+//
+//
+// Case normal_r: 2^(-2) <= |r| <= pi/4
+// ------------------------------------
+//
+// This case is more likely than the previous one if one considers
+// r to be uniformly distributed in [-pi/4 pi/4].
+//
+// The required calculation is either
+//
+// tan(r + c) = tan(r) + correction, or
+// -cot(r + c) = -cot(r) + correction.
+//
+// Specifically,
+//
+// tan(r + c) = tan(r) + c tan'(r) + O(c^2)
+// = tan(r) + c sec^2(r) + O(c^2)
+// = tan(r) + c SEC_sq ...accurately
+// as long as SEC_sq approximates sec^2(r)
+// to, say, 5 bits or so.
+//
+// Similarly,
+//
+// -cot(r + c) = -cot(r) - c cot'(r) + O(c^2)
+// = -cot(r) + c csc^2(r) + O(c^2)
+// = -cot(r) + c CSC_sq ...accurately
+// as long as CSC_sq approximates csc^2(r)
+// to, say, 5 bits or so.
+//
+// We therefore concentrate on accurately calculating tan(r) and
+// cot(r) for a working-precision number r, |r| <= pi/4 to within
+// 0.1% or so.
+//
+// We will employ a table-driven approach. Let
+//
+// r = sgn_r * 2^k * 1.b_1 b_2 ... b_5 ... b_63
+// = sgn_r * ( B + x )
+//
+// where
+//
+// B = 2^k * 1.b_1 b_2 ... b_5 1
+// x = |r| - B
+//
+// Now,
+// tan(B) + tan(x)
+// tan( B + x ) = ------------------------
+// 1 - tan(B)*tan(x)
+//
+// / \
+// | tan(B) + tan(x) |
+
+// = tan(B) + | ------------------------ - tan(B) |
+// | 1 - tan(B)*tan(x) |
+// \ /
+//
+// sec^2(B) * tan(x)
+// = tan(B) + ------------------------
+// 1 - tan(B)*tan(x)
+//
+// (1/[sin(B)*cos(B)]) * tan(x)
+// = tan(B) + --------------------------------
+// cot(B) - tan(x)
+//
+//
+// Clearly, the values of tan(B), cot(B) and 1/(sin(B)*cos(B)) are
+// calculated beforehand and stored in a table. Since
+//
+// |x| <= 2^k * 2^(-6) <= 2^(-7) (because k = -1, -2)
+//
+// a very short polynomial will be sufficient to approximate tan(x)
+// accurately. The details involved in computing the last expression
+// will be given in the next section on algorithm description.
+//
+//
+// Now, we turn to the case where cot( B + x ) is needed.
+//
+//
+// 1 - tan(B)*tan(x)
+// cot( B + x ) = ------------------------
+// tan(B) + tan(x)
+//
+// / \
+// | 1 - tan(B)*tan(x) |
+
+// = cot(B) + | ----------------------- - cot(B) |
+// | tan(B) + tan(x) |
+// \ /
+//
+// [tan(B) + cot(B)] * tan(x)
+// = cot(B) - ----------------------------
+// tan(B) + tan(x)
+//
+// (1/[sin(B)*cos(B)]) * tan(x)
+// = cot(B) - --------------------------------
+// tan(B) + tan(x)
+//
+//
+// Note that the values of tan(B), cot(B) and 1/(sin(B)*cos(B)) that
+// are needed are the same set of values needed in the previous
+// case.
+//
+// Finally, we can put all the ingredients together as follows:
+//
+// Arg = N * pi/2 + r + c ...accurately
+//
+// tan(Arg) = tan(r) + correction if N is even;
+// = -cot(r) + correction otherwise.
+//
+// For Cases 2 and 4,
+//
+// Case 2:
+// tan(Arg) = tan(r + c) = r + c + r^3/3 N even
+// = -cot(r + c) = -1/(r+c) + r/3 N odd
+// Case 4:
+// tan(Arg) = tan(r + c) = r + c + r^3/3 + 2r^5/15 N even
+// = -cot(r + c) = -1/(r+c) + r/3 + r^3/45 N odd
+//
+//
+// For Cases 1 and 3,
+//
+// Case small_r: |r| < 2^(-2)
+//
+// tan(Arg) = r + P1_1 r^3 + P1_2 r^5 + ... + P1_9 r^19
+// + c*(1 + r^2) N even
+//
+// = -1/(r+c) + Q1_1 r + Q1_2 r^3 + ... + Q1_7 r^13
+// + Q1_1*c N odd
+//
+// Case normal_r: 2^(-2) <= |r| <= pi/4
+//
+// tan(Arg) = tan(r) + c * sec^2(r) N even
+// = -cot(r) + c * csc^2(r) otherwise
+//
+// For N even,
+//
+// tan(Arg) = tan(r) + c*sec^2(r)
+// = tan( sgn_r * (B+x) ) + c * sec^2(|r|)
+// = sgn_r * ( tan(B+x) + sgn_r*c*sec^2(|r|) )
+// = sgn_r * ( tan(B+x) + sgn_r*c*sec^2(B) )
+//
+// since B approximates |r| to 2^(-6) in relative accuracy.
+//
+// / (1/[sin(B)*cos(B)]) * tan(x)
+// tan(Arg) = sgn_r * | tan(B) + --------------------------------
+// \ cot(B) - tan(x)
+// \
+// + CORR |
+
+// /
+// where
+//
+// CORR = sgn_r*c*tan(B)*SC_inv(B); SC_inv(B) = 1/(sin(B)*cos(B)).
+//
+// For N odd,
+//
+// tan(Arg) = -cot(r) + c*csc^2(r)
+// = -cot( sgn_r * (B+x) ) + c * csc^2(|r|)
+// = sgn_r * ( -cot(B+x) + sgn_r*c*csc^2(|r|) )
+// = sgn_r * ( -cot(B+x) + sgn_r*c*csc^2(B) )
+//
+// since B approximates |r| to 2^(-6) in relative accuracy.
+//
+// / (1/[sin(B)*cos(B)]) * tan(x)
+// tan(Arg) = sgn_r * | -cot(B) + --------------------------------
+// \ tan(B) + tan(x)
+// \
+// + CORR |
+
+// /
+// where
+//
+// CORR = sgn_r*c*cot(B)*SC_inv(B); SC_inv(B) = 1/(sin(B)*cos(B)).
+//
+//
+// The actual algorithm prescribes how all the mathematical formulas
+// are calculated.
+//
+//
+// 2. Algorithmic Description
+// ==========================
+//
+// 2.1 Computation for Cases 2 and 4.
+// ----------------------------------
+//
+// For Case 2, we use two-term polynomials.
+//
+// For N even,
+//
+// rsq := r * r
+// Result := c + r * rsq * P1_1
+// Result := r + Result ...in user-defined rounding
+//
+// For N odd,
+// S_hi := -frcpa(r) ...8 bits
+// S_hi := S_hi + S_hi*(1 + S_hi*r) ...16 bits
+// S_hi := S_hi + S_hi*(1 + S_hi*r) ...32 bits
+// S_hi := S_hi + S_hi*(1 + S_hi*r) ...64 bits
+// S_lo := S_hi*( (1 + S_hi*r) + S_hi*c )
+// ...S_hi + S_lo is -1/(r+c) to extra precision
+// S_lo := S_lo + Q1_1*r
+//
+// Result := S_hi + S_lo ...in user-defined rounding
+//
+// For Case 4, we use three-term polynomials
+//
+// For N even,
+//
+// rsq := r * r
+// Result := c + r * rsq * (P1_1 + rsq * P1_2)
+// Result := r + Result ...in user-defined rounding
+//
+// For N odd,
+// S_hi := -frcpa(r) ...8 bits
+// S_hi := S_hi + S_hi*(1 + S_hi*r) ...16 bits
+// S_hi := S_hi + S_hi*(1 + S_hi*r) ...32 bits
+// S_hi := S_hi + S_hi*(1 + S_hi*r) ...64 bits
+// S_lo := S_hi*( (1 + S_hi*r) + S_hi*c )
+// ...S_hi + S_lo is -1/(r+c) to extra precision
+// rsq := r * r
+// P := Q1_1 + rsq*Q1_2
+// S_lo := S_lo + r*P
+//
+// Result := S_hi + S_lo ...in user-defined rounding
+//
+//
+// Note that the coefficients P1_1, P1_2, Q1_1, and Q1_2 are
+// the same as those used in the small_r case of Cases 1 and 3
+// below.
+//
+//
+// 2.2 Computation for Cases 1 and 3.
+// ----------------------------------
+// This is further divided into the case of small_r,
+// where |r| < 2^(-2), and the case of normal_r, where |r| lies between
+// 2^(-2) and pi/4.
+//
+// Algorithm for the case of small_r
+// ---------------------------------
+//
+// For N even,
+// rsq := r * r
+// Poly1 := rsq*(P1_1 + rsq*(P1_2 + rsq*P1_3))
+// r_to_the_8 := rsq * rsq
+// r_to_the_8 := r_to_the_8 * r_to_the_8
+// Poly2 := P1_4 + rsq*(P1_5 + rsq*(P1_6 + ... rsq*P1_9))
+// CORR := c * ( 1 + rsq )
+// Poly := Poly1 + r_to_the_8*Poly2
+// Result := r*Poly + CORR
+// Result := r + Result ...in user-defined rounding
+// ...note that Poly1 and r_to_the_8 can be computed in parallel
+// ...with Poly2 (Poly1 is intentionally set to be much
+// ...shorter than Poly2 so that r_to_the_8 and CORR can be hidden)
+//
+// For N odd,
+// S_hi := -frcpa(r) ...8 bits
+// S_hi := S_hi + S_hi*(1 + S_hi*r) ...16 bits
+// S_hi := S_hi + S_hi*(1 + S_hi*r) ...32 bits
+// S_hi := S_hi + S_hi*(1 + S_hi*r) ...64 bits
+// S_lo := S_hi*( (1 + S_hi*r) + S_hi*c )
+// ...S_hi + S_lo is -1/(r+c) to extra precision
+// S_lo := S_lo + Q1_1*c
+//
+// ...S_hi and S_lo are computed in parallel with
+// ...the following
+// rsq := r*r
+// P := Q1_1 + rsq*(Q1_2 + rsq*(Q1_3 + ... + rsq*Q1_7))
+//
+// Result := r*P + S_lo
+// Result := S_hi + Result ...in user-defined rounding
+//
+//
+// Algorithm for the case of normal_r
+// ----------------------------------
+//
+// Here, we first consider the computation of tan( r + c ). As
+// presented in the previous section,
+//
+// tan( r + c ) = tan(r) + c * sec^2(r)
+// = sgn_r * [ tan(B+x) + CORR ]
+// CORR = sgn_r * c * tan(B) * 1/[sin(B)*cos(B)]
+//
+// because sec^2(r) = sec^(|r|), and B approximate |r| to 6.5 bits.
+//
+// tan( r + c ) =
+// / (1/[sin(B)*cos(B)]) * tan(x)
+// sgn_r * | tan(B) + -------------------------------- +
+// \ cot(B) - tan(x)
+// \
+// CORR |
+
+// /
+//
+// The values of tan(B), cot(B) and 1/(sin(B)*cos(B)) are
+// calculated beforehand and stored in a table. Specifically,
+// the table values are
+//
+// tan(B) as T_hi + T_lo;
+// cot(B) as C_hi + C_lo;
+// 1/[sin(B)*cos(B)] as SC_inv
+//
+// T_hi, C_hi are in double-precision memory format;
+// T_lo, C_lo are in single-precision memory format;
+// SC_inv is in extended-precision memory format.
+//
+// The value of tan(x) will be approximated by a short polynomial of
+// the form
+//
+// tan(x) as x + x * P, where
+// P = x^2 * (P2_1 + x^2 * (P2_2 + x^2 * P2_3))
+//
+// Because |x| <= 2^(-7), cot(B) - x approximates cot(B) - tan(x)
+// to a relative accuracy better than 2^(-20). Thus, a good
+// initial guess of 1/( cot(B) - tan(x) ) to initiate the iterative
+// division is:
+//
+// 1/(cot(B) - tan(x)) is approximately
+// 1/(cot(B) - x) is
+// tan(B)/(1 - x*tan(B)) is approximately
+// T_hi / ( 1 - T_hi * x ) is approximately
+//
+// T_hi * [ 1 + (Thi * x) + (T_hi * x)^2 ]
+//
+// The calculation of tan(r+c) therefore proceed as follows:
+//
+// Tx := T_hi * x
+// xsq := x * x
+//
+// V_hi := T_hi*(1 + Tx*(1 + Tx))
+// P := xsq * (P1_1 + xsq*(P1_2 + xsq*P1_3))
+// ...V_hi serves as an initial guess of 1/(cot(B) - tan(x))
+// ...good to about 20 bits of accuracy
+//
+// tanx := x + x*P
+// D := C_hi - tanx
+// ...D is a double precision denominator: cot(B) - tan(x)
+//
+// V_hi := V_hi + V_hi*(1 - V_hi*D)
+// ....V_hi approximates 1/(cot(B)-tan(x)) to 40 bits
+//
+// V_lo := V_hi * ( [ (1 - V_hi*C_hi) + V_hi*tanx ]
+// - V_hi*C_lo ) ...observe all order
+// ...V_hi + V_lo approximates 1/(cot(B) - tan(x))
+// ...to extra accuracy
+//
+// ... SC_inv(B) * (x + x*P)
+// ... tan(B) + ------------------------- + CORR
+// ... cot(B) - (x + x*P)
+// ...
+// ... = tan(B) + SC_inv(B)*(x + x*P)*(V_hi + V_lo) + CORR
+// ...
+//
+// Sx := SC_inv * x
+// CORR := sgn_r * c * SC_inv * T_hi
+//
+// ...put the ingredients together to compute
+// ... SC_inv(B) * (x + x*P)
+// ... tan(B) + ------------------------- + CORR
+// ... cot(B) - (x + x*P)
+// ...
+// ... = tan(B) + SC_inv(B)*(x + x*P)*(V_hi + V_lo) + CORR
+// ...
+// ... = T_hi + T_lo + CORR +
+// ... Sx * V_hi + Sx * V_lo + Sx * P *(V_hi + V_lo)
+//
+// CORR := CORR + T_lo
+// tail := V_lo + P*(V_hi + V_lo)
+// tail := Sx * tail + CORR
+// tail := Sx * V_hi + tail
+// T_hi := sgn_r * T_hi
+//
+// ...T_hi + sgn_r*tail now approximate
+// ...sgn_r*(tan(B+x) + CORR) accurately
+//
+// Result := T_hi + sgn_r*tail ...in user-defined
+// ...rounding control
+// ...It is crucial that independent paths be fully
+// ...exploited for performance's sake.
+//
+//
+// Next, we consider the computation of -cot( r + c ). As
+// presented in the previous section,
+//
+// -cot( r + c ) = -cot(r) + c * csc^2(r)
+// = sgn_r * [ -cot(B+x) + CORR ]
+// CORR = sgn_r * c * cot(B) * 1/[sin(B)*cos(B)]
+//
+// because csc^2(r) = csc^(|r|), and B approximate |r| to 6.5 bits.
+//
+// -cot( r + c ) =
+// / (1/[sin(B)*cos(B)]) * tan(x)
+// sgn_r * | -cot(B) + -------------------------------- +
+// \ tan(B) + tan(x)
+// \
+// CORR |
+
+// /
+//
+// The values of tan(B), cot(B) and 1/(sin(B)*cos(B)) are
+// calculated beforehand and stored in a table. Specifically,
+// the table values are
+//
+// tan(B) as T_hi + T_lo;
+// cot(B) as C_hi + C_lo;
+// 1/[sin(B)*cos(B)] as SC_inv
+//
+// T_hi, C_hi are in double-precision memory format;
+// T_lo, C_lo are in single-precision memory format;
+// SC_inv is in extended-precision memory format.
+//
+// The value of tan(x) will be approximated by a short polynomial of
+// the form
+//
+// tan(x) as x + x * P, where
+// P = x^2 * (P2_1 + x^2 * (P2_2 + x^2 * P2_3))
+//
+// Because |x| <= 2^(-7), tan(B) + x approximates tan(B) + tan(x)
+// to a relative accuracy better than 2^(-18). Thus, a good
+// initial guess of 1/( tan(B) + tan(x) ) to initiate the iterative
+// division is:
+//
+// 1/(tan(B) + tan(x)) is approximately
+// 1/(tan(B) + x) is
+// cot(B)/(1 + x*cot(B)) is approximately
+// C_hi / ( 1 + C_hi * x ) is approximately
+//
+// C_hi * [ 1 - (C_hi * x) + (C_hi * x)^2 ]
+//
+// The calculation of -cot(r+c) therefore proceed as follows:
+//
+// Cx := C_hi * x
+// xsq := x * x
+//
+// V_hi := C_hi*(1 - Cx*(1 - Cx))
+// P := xsq * (P1_1 + xsq*(P1_2 + xsq*P1_3))
+// ...V_hi serves as an initial guess of 1/(tan(B) + tan(x))
+// ...good to about 18 bits of accuracy
+//
+// tanx := x + x*P
+// D := T_hi + tanx
+// ...D is a double precision denominator: tan(B) + tan(x)
+//
+// V_hi := V_hi + V_hi*(1 - V_hi*D)
+// ....V_hi approximates 1/(tan(B)+tan(x)) to 40 bits
+//
+// V_lo := V_hi * ( [ (1 - V_hi*T_hi) - V_hi*tanx ]
+// - V_hi*T_lo ) ...observe all order
+// ...V_hi + V_lo approximates 1/(tan(B) + tan(x))
+// ...to extra accuracy
+//
+// ... SC_inv(B) * (x + x*P)
+// ... -cot(B) + ------------------------- + CORR
+// ... tan(B) + (x + x*P)
+// ...
+// ... =-cot(B) + SC_inv(B)*(x + x*P)*(V_hi + V_lo) + CORR
+// ...
+//
+// Sx := SC_inv * x
+// CORR := sgn_r * c * SC_inv * C_hi
+//
+// ...put the ingredients together to compute
+// ... SC_inv(B) * (x + x*P)
+// ... -cot(B) + ------------------------- + CORR
+// ... tan(B) + (x + x*P)
+// ...
+// ... =-cot(B) + SC_inv(B)*(x + x*P)*(V_hi + V_lo) + CORR
+// ...
+// ... =-C_hi - C_lo + CORR +
+// ... Sx * V_hi + Sx * V_lo + Sx * P *(V_hi + V_lo)
+//
+// CORR := CORR - C_lo
+// tail := V_lo + P*(V_hi + V_lo)
+// tail := Sx * tail + CORR
+// tail := Sx * V_hi + tail
+// C_hi := -sgn_r * C_hi
+//
+// ...C_hi + sgn_r*tail now approximates
+// ...sgn_r*(-cot(B+x) + CORR) accurately
+//
+// Result := C_hi + sgn_r*tail in user-defined rounding control
+// ...It is crucial that independent paths be fully
+// ...exploited for performance's sake.
+//
+// 3. Implementation Notes
+// =======================
+//
+// Table entries T_hi, T_lo; C_hi, C_lo; SC_inv
+//
+// Recall that 2^(-2) <= |r| <= pi/4;
+//
+// r = sgn_r * 2^k * 1.b_1 b_2 ... b_63
+//
+// and
+//
+// B = 2^k * 1.b_1 b_2 b_3 b_4 b_5 1
+//
+// Thus, for k = -2, possible values of B are
+//
+// B = 2^(-2) * ( 1 + index/32 + 1/64 ),
+// index ranges from 0 to 31
+//
+// For k = -1, however, since |r| <= pi/4 = 0.78...
+// possible values of B are
+//
+// B = 2^(-1) * ( 1 + index/32 + 1/64 )
+// index ranges from 0 to 19.
+//
+//
+
+#include "libm_support.h"
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
+.align 128
+
+TAN_BASE_CONSTANTS:
+ASM_TYPE_DIRECTIVE(TAN_BASE_CONSTANTS,@object)
+data4 0x4B800000, 0xCB800000, 0x38800000, 0xB8800000 // two**24, -two**24
+ // two**-14, -two**-14
+data4 0x4E44152A, 0xA2F9836E, 0x00003FFE, 0x00000000 // two_by_pi
+data4 0xCE81B9F1, 0xC84D32B0, 0x00004016, 0x00000000 // P_0
+data4 0x2168C235, 0xC90FDAA2, 0x00003FFF, 0x00000000 // P_1
+data4 0xFC8F8CBB, 0xECE675D1, 0x0000BFBD, 0x00000000 // P_2
+data4 0xACC19C60, 0xB7ED8FBB, 0x0000BF7C, 0x00000000 // P_3
+data4 0x5F000000, 0xDF000000, 0x00000000, 0x00000000 // two_to_63, -two_to_63
+data4 0x6EC6B45A, 0xA397E504, 0x00003FE7, 0x00000000 // Inv_P_0
+data4 0xDBD171A1, 0x8D848E89, 0x0000BFBF, 0x00000000 // d_1
+data4 0x18A66F8E, 0xD5394C36, 0x0000BF7C, 0x00000000 // d_2
+data4 0x2168C234, 0xC90FDAA2, 0x00003FFE, 0x00000000 // PI_BY_4
+data4 0x2168C234, 0xC90FDAA2, 0x0000BFFE, 0x00000000 // MPI_BY_4
+data4 0x3E800000, 0xBE800000, 0x00000000, 0x00000000 // two**-2, -two**-2
+data4 0x2F000000, 0xAF000000, 0x00000000, 0x00000000 // two**-33, -two**-33
+data4 0xAAAAAABD, 0xAAAAAAAA, 0x00003FFD, 0x00000000 // P1_1
+data4 0x88882E6A, 0x88888888, 0x00003FFC, 0x00000000 // P1_2
+data4 0x0F0177B6, 0xDD0DD0DD, 0x00003FFA, 0x00000000 // P1_3
+data4 0x646B8C6D, 0xB327A440, 0x00003FF9, 0x00000000 // P1_4
+data4 0x1D5F7D20, 0x91371B25, 0x00003FF8, 0x00000000 // P1_5
+data4 0x61C67914, 0xEB69A5F1, 0x00003FF6, 0x00000000 // P1_6
+data4 0x019318D2, 0xBEDD37BE, 0x00003FF5, 0x00000000 // P1_7
+data4 0x3C794015, 0x9979B146, 0x00003FF4, 0x00000000 // P1_8
+data4 0x8C6EB58A, 0x8EBD21A3, 0x00003FF3, 0x00000000 // P1_9
+data4 0xAAAAAAB4, 0xAAAAAAAA, 0x00003FFD, 0x00000000 // Q1_1
+data4 0x0B5FC93E, 0xB60B60B6, 0x00003FF9, 0x00000000 // Q1_2
+data4 0x0C9BBFBF, 0x8AB355E0, 0x00003FF6, 0x00000000 // Q1_3
+data4 0xCBEE3D4C, 0xDDEBBC89, 0x00003FF2, 0x00000000 // Q1_4
+data4 0x5F80BBB6, 0xB3548A68, 0x00003FEF, 0x00000000 // Q1_5
+data4 0x4CED5BF1, 0x91362560, 0x00003FEC, 0x00000000 // Q1_6
+data4 0x8EE92A83, 0xF189D95A, 0x00003FE8, 0x00000000 // Q1_7
+data4 0xAAAB362F, 0xAAAAAAAA, 0x00003FFD, 0x00000000 // P2_1
+data4 0xE97A6097, 0x88888886, 0x00003FFC, 0x00000000 // P2_2
+data4 0x25E716A1, 0xDD108EE0, 0x00003FFA, 0x00000000 // P2_3
+//
+// Entries T_hi double-precision memory format
+// Index = 0,1,...,31 B = 2^(-2)*(1+Index/32+1/64)
+// Entries T_lo single-precision memory format
+// Index = 0,1,...,31 B = 2^(-2)*(1+Index/32+1/64)
+//
+data4 0x62400794, 0x3FD09BC3, 0x23A05C32, 0x00000000
+data4 0xDFFBC074, 0x3FD124A9, 0x240078B2, 0x00000000
+data4 0x5BD4920F, 0x3FD1AE23, 0x23826B8E, 0x00000000
+data4 0x15E2701D, 0x3FD23835, 0x22D31154, 0x00000000
+data4 0x63739C2D, 0x3FD2C2E4, 0x2265C9E2, 0x00000000
+data4 0xAFEEA48B, 0x3FD34E36, 0x245C05EB, 0x00000000
+data4 0x7DBB35D1, 0x3FD3DA31, 0x24749F2D, 0x00000000
+data4 0x67321619, 0x3FD466DA, 0x2462CECE, 0x00000000
+data4 0x1F94A4D5, 0x3FD4F437, 0x246D0DF1, 0x00000000
+data4 0x740C3E6D, 0x3FD5824D, 0x240A85B5, 0x00000000
+data4 0x4CB1E73D, 0x3FD61123, 0x23F96E33, 0x00000000
+data4 0xAD9EA64B, 0x3FD6A0BE, 0x247C5393, 0x00000000
+data4 0xB804FD01, 0x3FD73125, 0x241F3B29, 0x00000000
+data4 0xAB53EE83, 0x3FD7C25E, 0x2479989B, 0x00000000
+data4 0xE6640EED, 0x3FD8546F, 0x23B343BC, 0x00000000
+data4 0xE8AF1892, 0x3FD8E75F, 0x241454D1, 0x00000000
+data4 0x53928BDA, 0x3FD97B35, 0x238613D9, 0x00000000
+data4 0xEB9DE4DE, 0x3FDA0FF6, 0x22859FA7, 0x00000000
+data4 0x99ECF92D, 0x3FDAA5AB, 0x237A6D06, 0x00000000
+data4 0x6D8F1796, 0x3FDB3C5A, 0x23952F6C, 0x00000000
+data4 0x9CFB8BE4, 0x3FDBD40A, 0x2280FC95, 0x00000000
+data4 0x87943100, 0x3FDC6CC3, 0x245D2EC0, 0x00000000
+data4 0xB736C500, 0x3FDD068C, 0x23C4AD7D, 0x00000000
+data4 0xE1DDBC31, 0x3FDDA16D, 0x23D076E6, 0x00000000
+data4 0xEB515A93, 0x3FDE3D6E, 0x244809A6, 0x00000000
+data4 0xE6E9E5F1, 0x3FDEDA97, 0x220856C8, 0x00000000
+data4 0x1963CE69, 0x3FDF78F1, 0x244BE993, 0x00000000
+data4 0x7D635BCE, 0x3FE00C41, 0x23D21799, 0x00000000
+data4 0x1C302CD3, 0x3FE05CAB, 0x248A1B1D, 0x00000000
+data4 0xDB6A1FA0, 0x3FE0ADB9, 0x23D53E33, 0x00000000
+data4 0x4A20BA81, 0x3FE0FF72, 0x24DB9ED5, 0x00000000
+data4 0x153FA6F5, 0x3FE151D9, 0x24E9E451, 0x00000000
+//
+// Entries T_hi double-precision memory format
+// Index = 0,1,...,19 B = 2^(-1)*(1+Index/32+1/64)
+// Entries T_lo single-precision memory format
+// Index = 0,1,...,19 B = 2^(-1)*(1+Index/32+1/64)
+//
+data4 0xBA1BE39E, 0x3FE1CEC4, 0x24B60F9E, 0x00000000
+data4 0x5ABD9B2D, 0x3FE277E4, 0x248C2474, 0x00000000
+data4 0x0272B110, 0x3FE32418, 0x247B8311, 0x00000000
+data4 0x890E2DF0, 0x3FE3D38B, 0x24C55751, 0x00000000
+data4 0x46236871, 0x3FE4866D, 0x24E5BC34, 0x00000000
+data4 0x45E044B0, 0x3FE53CEE, 0x24001BA4, 0x00000000
+data4 0x82EC06E4, 0x3FE5F742, 0x24B973DC, 0x00000000
+data4 0x25DF43F9, 0x3FE6B5A1, 0x24895440, 0x00000000
+data4 0xCAFD348C, 0x3FE77844, 0x240021CA, 0x00000000
+data4 0xCEED6B92, 0x3FE83F6B, 0x24C45372, 0x00000000
+data4 0xA34F3665, 0x3FE90B58, 0x240DAD33, 0x00000000
+data4 0x2C1E56B4, 0x3FE9DC52, 0x24F846CE, 0x00000000
+data4 0x27041578, 0x3FEAB2A4, 0x2323FB6E, 0x00000000
+data4 0x9DD8C373, 0x3FEB8E9F, 0x24B3090B, 0x00000000
+data4 0x65C9AA7B, 0x3FEC709B, 0x2449F611, 0x00000000
+data4 0xACCF8435, 0x3FED58F4, 0x23616A7E, 0x00000000
+data4 0x97635082, 0x3FEE480F, 0x24C2FEAE, 0x00000000
+data4 0xF0ACC544, 0x3FEF3E57, 0x242CE964, 0x00000000
+data4 0xF7E06E4B, 0x3FF01E20, 0x2480D3EE, 0x00000000
+data4 0x8A798A69, 0x3FF0A125, 0x24DB8967, 0x00000000
+//
+// Entries C_hi double-precision memory format
+// Index = 0,1,...,31 B = 2^(-2)*(1+Index/32+1/64)
+// Entries C_lo single-precision memory format
+// Index = 0,1,...,31 B = 2^(-2)*(1+Index/32+1/64)
+//
+data4 0xE63EFBD0, 0x400ED3E2, 0x259D94D4, 0x00000000
+data4 0xC515DAB5, 0x400DDDB4, 0x245F0537, 0x00000000
+data4 0xBE19A79F, 0x400CF57A, 0x25D4EA9F, 0x00000000
+data4 0xD15298ED, 0x400C1A06, 0x24AE40A0, 0x00000000
+data4 0x164B2708, 0x400B4A4C, 0x25A5AAB6, 0x00000000
+data4 0x5285B068, 0x400A855A, 0x25524F18, 0x00000000
+data4 0x3FFA549F, 0x4009CA5A, 0x24C999C0, 0x00000000
+data4 0x646AF623, 0x4009188A, 0x254FD801, 0x00000000
+data4 0x6084D0E7, 0x40086F3C, 0x2560F5FD, 0x00000000
+data4 0xA29A76EE, 0x4007CDD2, 0x255B9D19, 0x00000000
+data4 0x6C8ECA95, 0x400733BE, 0x25CB021B, 0x00000000
+data4 0x1F8DDC52, 0x4006A07E, 0x24AB4722, 0x00000000
+data4 0xC298AD58, 0x4006139B, 0x252764E2, 0x00000000
+data4 0xBAD7164B, 0x40058CAB, 0x24DAF5DB, 0x00000000
+data4 0xAE31A5D3, 0x40050B4B, 0x25EA20F4, 0x00000000
+data4 0x89F85A8A, 0x40048F21, 0x2583A3E8, 0x00000000
+data4 0xA862380D, 0x400417DA, 0x25DCC4CC, 0x00000000
+data4 0x1088FCFE, 0x4003A52B, 0x2430A492, 0x00000000
+data4 0xCD3527D5, 0x400336CC, 0x255F77CF, 0x00000000
+data4 0x5760766D, 0x4002CC7F, 0x25DA0BDA, 0x00000000
+data4 0x11CE02E3, 0x40026607, 0x256FF4A2, 0x00000000
+data4 0xD37BBE04, 0x4002032C, 0x25208AED, 0x00000000
+data4 0x7F050775, 0x4001A3BD, 0x24B72DD6, 0x00000000
+data4 0xA554848A, 0x40014789, 0x24AB4DAA, 0x00000000
+data4 0x323E81B7, 0x4000EE65, 0x2584C440, 0x00000000
+data4 0x21CF1293, 0x40009827, 0x25C9428D, 0x00000000
+data4 0x3D415EEB, 0x400044A9, 0x25DC8482, 0x00000000
+data4 0xBD72C577, 0x3FFFE78F, 0x257F5070, 0x00000000
+data4 0x75EFD28E, 0x3FFF4AC3, 0x23EBBF7A, 0x00000000
+data4 0x60B52DDE, 0x3FFEB2AF, 0x22EECA07, 0x00000000
+data4 0x35204180, 0x3FFE1F19, 0x24191079, 0x00000000
+data4 0x54F7E60A, 0x3FFD8FCA, 0x248D3058, 0x00000000
+//
+// Entries C_hi double-precision memory format
+// Index = 0,1,...,19 B = 2^(-1)*(1+Index/32+1/64)
+// Entries C_lo single-precision memory format
+// Index = 0,1,...,19 B = 2^(-1)*(1+Index/32+1/64)
+//
+data4 0x79F6FADE, 0x3FFCC06A, 0x239C7886, 0x00000000
+data4 0x891662A6, 0x3FFBB91F, 0x250BD191, 0x00000000
+data4 0x529F155D, 0x3FFABFB6, 0x256CC3E6, 0x00000000
+data4 0x2E964AE9, 0x3FF9D300, 0x250843E3, 0x00000000
+data4 0x89DCB383, 0x3FF8F1EF, 0x2277C87E, 0x00000000
+data4 0x7C87DBD6, 0x3FF81B93, 0x256DA6CF, 0x00000000
+data4 0x1042EDE4, 0x3FF74F14, 0x2573D28A, 0x00000000
+data4 0x1784B360, 0x3FF68BAF, 0x242E489A, 0x00000000
+data4 0x7C923C4C, 0x3FF5D0B5, 0x2532D940, 0x00000000
+data4 0xF418EF20, 0x3FF51D88, 0x253C7DD6, 0x00000000
+data4 0x02F88DAE, 0x3FF4719A, 0x23DB59BF, 0x00000000
+data4 0x49DA0788, 0x3FF3CC66, 0x252B4756, 0x00000000
+data4 0x0B980DB8, 0x3FF32D77, 0x23FE585F, 0x00000000
+data4 0xE56C987A, 0x3FF2945F, 0x25378A63, 0x00000000
+data4 0xB16523F6, 0x3FF200BD, 0x247BB2E0, 0x00000000
+data4 0x8CE27778, 0x3FF17235, 0x24446538, 0x00000000
+data4 0xFDEFE692, 0x3FF0E873, 0x2514638F, 0x00000000
+data4 0x33154062, 0x3FF0632C, 0x24A7FC27, 0x00000000
+data4 0xB3EF115F, 0x3FEFC42E, 0x248FD0FE, 0x00000000
+data4 0x135D26F6, 0x3FEEC9E8, 0x2385C719, 0x00000000
+//
+// Entries SC_inv in Swapped IEEE format (extended)
+// Index = 0,1,...,31 B = 2^(-2)*(1+Index/32+1/64)
+//
+data4 0x1BF30C9E, 0x839D6D4A, 0x00004001, 0x00000000
+data4 0x554B0EB0, 0x80092804, 0x00004001, 0x00000000
+data4 0xA1CF0DE9, 0xF959F94C, 0x00004000, 0x00000000
+data4 0x77378677, 0xF3086BA0, 0x00004000, 0x00000000
+data4 0xCCD4723C, 0xED154515, 0x00004000, 0x00000000
+data4 0x1C27CF25, 0xE7790944, 0x00004000, 0x00000000
+data4 0x8DDACB88, 0xE22D037D, 0x00004000, 0x00000000
+data4 0x89C73522, 0xDD2B2D8A, 0x00004000, 0x00000000
+data4 0xBB2C1171, 0xD86E1A23, 0x00004000, 0x00000000
+data4 0xDFF5E0F9, 0xD3F0E288, 0x00004000, 0x00000000
+data4 0x283BEBD5, 0xCFAF16B1, 0x00004000, 0x00000000
+data4 0x0D88DD53, 0xCBA4AFAA, 0x00004000, 0x00000000
+data4 0xCA67C43D, 0xC7CE03CC, 0x00004000, 0x00000000
+data4 0x0CA0DDB0, 0xC427BC82, 0x00004000, 0x00000000
+data4 0xF13D8CAB, 0xC0AECD57, 0x00004000, 0x00000000
+data4 0x71ECE6B1, 0xBD606C38, 0x00004000, 0x00000000
+data4 0xA44C4929, 0xBA3A0A96, 0x00004000, 0x00000000
+data4 0xE5CCCEC1, 0xB7394F6F, 0x00004000, 0x00000000
+data4 0x9637D8BC, 0xB45C1203, 0x00004000, 0x00000000
+data4 0x92CB051B, 0xB1A05528, 0x00004000, 0x00000000
+data4 0x6BA2FFD0, 0xAF04432B, 0x00004000, 0x00000000
+data4 0x7221235F, 0xAC862A23, 0x00004000, 0x00000000
+data4 0x5F00A9D1, 0xAA2478AF, 0x00004000, 0x00000000
+data4 0x81E082BF, 0xA7DDBB0C, 0x00004000, 0x00000000
+data4 0x45684FEE, 0xA5B0987D, 0x00004000, 0x00000000
+data4 0x627A8F53, 0xA39BD0F5, 0x00004000, 0x00000000
+data4 0x6EC5C8B0, 0xA19E3B03, 0x00004000, 0x00000000
+data4 0x91CD7C66, 0x9FB6C1F0, 0x00004000, 0x00000000
+data4 0x1FA3DF8A, 0x9DE46410, 0x00004000, 0x00000000
+data4 0xA8F6B888, 0x9C263139, 0x00004000, 0x00000000
+data4 0xC27B0450, 0x9A7B4968, 0x00004000, 0x00000000
+data4 0x5EE614EE, 0x98E2DB7E, 0x00004000, 0x00000000
+//
+// Entries SC_inv in Swapped IEEE format (extended)
+// Index = 0,1,...,19 B = 2^(-1)*(1+Index/32+1/64)
+//
+data4 0x13B2B5BA, 0x969F335C, 0x00004000, 0x00000000
+data4 0xD4C0F548, 0x93D446D9, 0x00004000, 0x00000000
+data4 0x61B798AF, 0x9147094F, 0x00004000, 0x00000000
+data4 0x758787AC, 0x8EF317CC, 0x00004000, 0x00000000
+data4 0xB99EEFDB, 0x8CD498B3, 0x00004000, 0x00000000
+data4 0xDFF8BC37, 0x8AE82A7D, 0x00004000, 0x00000000
+data4 0xE3C55D42, 0x892AD546, 0x00004000, 0x00000000
+data4 0xD15573C1, 0x8799FEA9, 0x00004000, 0x00000000
+data4 0x435A4B4C, 0x86335F88, 0x00004000, 0x00000000
+data4 0x3E93A87B, 0x84F4FB6E, 0x00004000, 0x00000000
+data4 0x80A382FB, 0x83DD1952, 0x00004000, 0x00000000
+data4 0xA4CB8C9E, 0x82EA3D7F, 0x00004000, 0x00000000
+data4 0x6861D0A8, 0x821B247C, 0x00004000, 0x00000000
+data4 0x63E8D244, 0x816EBED1, 0x00004000, 0x00000000
+data4 0x27E4CFC6, 0x80E42D91, 0x00004000, 0x00000000
+data4 0x28E64AFD, 0x807ABF8D, 0x00004000, 0x00000000
+data4 0x863B4FD8, 0x8031EF26, 0x00004000, 0x00000000
+data4 0xAE8C11FD, 0x800960AD, 0x00004000, 0x00000000
+data4 0x5FDBEC21, 0x8000E147, 0x00004000, 0x00000000
+data4 0xA07791FA, 0x80186650, 0x00004000, 0x00000000
+
+Arg = f8
+Result = f8
+fp_tmp = f9
+U_2 = f10
+rsq = f11
+C_hi = f12
+C_lo = f13
+T_hi = f14
+T_lo = f15
+
+N_0 = f32
+d_1 = f33
+MPI_BY_4 = f34
+tail = f35
+tanx = f36
+Cx = f37
+Sx = f38
+sgn_r = f39
+CORR = f40
+P = f41
+D = f42
+ArgPrime = f43
+P_0 = f44
+
+P2_1 = f45
+P2_2 = f46
+P2_3 = f47
+
+P1_1 = f45
+P1_2 = f46
+P1_3 = f47
+
+P1_4 = f48
+P1_5 = f49
+P1_6 = f50
+P1_7 = f51
+P1_8 = f52
+P1_9 = f53
+
+TWO_TO_63 = f54
+NEGTWO_TO_63 = f55
+x = f56
+xsq = f57
+Tx = f58
+Tx1 = f59
+Set = f60
+poly1 = f61
+poly2 = f62
+Poly = f63
+Poly1 = f64
+Poly2 = f65
+r_to_the_8 = f66
+B = f67
+SC_inv = f68
+Pos_r = f69
+N_0_fix = f70
+PI_BY_4 = f71
+NEGTWO_TO_NEG2 = f72
+TWO_TO_24 = f73
+TWO_TO_NEG14 = f74
+TWO_TO_NEG33 = f75
+NEGTWO_TO_24 = f76
+NEGTWO_TO_NEG14 = f76
+NEGTWO_TO_NEG33 = f77
+two_by_PI = f78
+N = f79
+N_fix = f80
+P_1 = f81
+P_2 = f82
+P_3 = f83
+s_val = f84
+w = f85
+c = f86
+r = f87
+Z = f88
+A = f89
+a = f90
+t = f91
+U_1 = f92
+d_2 = f93
+TWO_TO_NEG2 = f94
+Q1_1 = f95
+Q1_2 = f96
+Q1_3 = f97
+Q1_4 = f98
+Q1_5 = f99
+Q1_6 = f100
+Q1_7 = f101
+Q1_8 = f102
+S_hi = f103
+S_lo = f104
+V_hi = f105
+V_lo = f106
+U_hi = f107
+U_lo = f108
+U_hiabs = f109
+V_hiabs = f110
+V = f111
+Inv_P_0 = f112
+
+GR_SAVE_B0 = r33
+GR_SAVE_GP = r34
+GR_SAVE_PFS = r35
+
+delta1 = r36
+table_ptr1 = r37
+table_ptr2 = r38
+i_0 = r39
+i_1 = r40
+N_fix_gr = r41
+N_inc = r42
+exp_Arg = r43
+exp_r = r44
+sig_r = r45
+lookup = r46
+table_offset = r47
+Create_B = r48
+gr_tmp = r49
+
+GR_Parameter_X = r49
+GR_Parameter_r = r50
+
+
+
+.global __libm_tan
+.section .text
+.proc __libm_tan
+
+
+__libm_tan:
+
+{ .mfi
+alloc r32 = ar.pfs, 0,17,2,0
+(p0) fclass.m.unc p6,p0 = Arg, 0x1E7
+ addl gr_tmp = -1,r0
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p0) fclass.nm.unc p7,p0 = Arg, 0x1FF
+ nop.i 999
+}
+;;
+
+{ .mfi
+(p0) addl table_ptr1 = @ltoff(TAN_BASE_CONSTANTS), gp
+ nop.f 999
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ld8 table_ptr1 = [table_ptr1]
+ setf.sig fp_tmp = gr_tmp // Make a constant so fmpy produces inexact
+ nop.i 999
+}
+;;
+
+//
+// Check for NatVals, Infs , NaNs, and Zeros
+// Check for everything - if false, then must be pseudo-zero
+// or pseudo-nan.
+// Local table pointer
+//
+
+{ .mbb
+(p0) add table_ptr2 = 96, table_ptr1
+(p6) br.cond.spnt __libm_TAN_SPECIAL
+(p7) br.cond.spnt __libm_TAN_SPECIAL ;;
+}
+//
+// Point to Inv_P_0
+// Branch out to deal with unsupporteds and special values.
+//
+
+{ .mmf
+(p0) ldfs TWO_TO_24 = [table_ptr1],4
+(p0) ldfs TWO_TO_63 = [table_ptr2],4
+//
+// Load -2**24, load -2**63.
+//
+(p0) fcmp.eq.s0 p0, p6 = Arg, f1 ;;
+}
+
+{ .mfi
+(p0) ldfs NEGTWO_TO_63 = [table_ptr2],12
+(p0) fnorm.s1 Arg = Arg
+ nop.i 999
+}
+//
+// Load 2**24, Load 2**63.
+//
+
+{ .mmi
+(p0) ldfs NEGTWO_TO_24 = [table_ptr1],12 ;;
+//
+// Do fcmp to generate Denormal exception
+// - can't do FNORM (will generate Underflow when U is unmasked!)
+// Normalize input argument.
+//
+(p0) ldfe two_by_PI = [table_ptr1],16
+ nop.i 999
+}
+
+{ .mmi
+(p0) ldfe Inv_P_0 = [table_ptr2],16 ;;
+(p0) ldfe d_1 = [table_ptr2],16
+ nop.i 999
+}
+//
+// Decide about the paths to take:
+// PR_1 and PR_3 set if -2**24 < Arg < 2**24 - CASE 1 OR 2
+// OTHERWISE - CASE 3 OR 4
+// Load inverse of P_0 .
+// Set PR_6 if Arg <= -2**63
+// Are there any Infs, NaNs, or zeros?
+//
+
+{ .mmi
+(p0) ldfe P_0 = [table_ptr1],16 ;;
+(p0) ldfe d_2 = [table_ptr2],16
+ nop.i 999
+}
+//
+// Set PR_8 if Arg <= -2**24
+// Set PR_6 if Arg >= 2**63
+//
+
+{ .mmi
+(p0) ldfe P_1 = [table_ptr1],16 ;;
+(p0) ldfe PI_BY_4 = [table_ptr2],16
+ nop.i 999
+}
+//
+// Set PR_8 if Arg >= 2**24
+//
+
+{ .mmi
+(p0) ldfe P_2 = [table_ptr1],16 ;;
+(p0) ldfe MPI_BY_4 = [table_ptr2],16
+ nop.i 999
+}
+//
+// Load P_2 and PI_BY_4
+//
+
+{ .mfi
+(p0) ldfe P_3 = [table_ptr1],16
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fcmp.le.unc.s1 p6,p7 = Arg,NEGTWO_TO_63
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fcmp.le.unc.s1 p8,p9 = Arg,NEGTWO_TO_24
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p7) fcmp.ge.s1 p6,p0 = Arg,TWO_TO_63
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p9) fcmp.ge.s1 p8,p0 = Arg,TWO_TO_24
+ nop.i 999 ;;
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+//
+// Load P_3 and -PI_BY_4
+//
+(p6) br.cond.spnt TAN_ARG_TOO_LARGE ;;
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+//
+// Load 2**(-2).
+// Load -2**(-2).
+// Branch out if we have a special argument.
+// Branch out if the magnitude of the input argument is too large
+// - do this branch before the next.
+//
+(p8) br.cond.spnt TAN_LARGER_ARG ;;
+}
+//
+// Branch to Cases 3 or 4 if Arg <= -2**24 or Arg >= 2**24
+//
+
+{ .mfi
+(p0) ldfs TWO_TO_NEG2 = [table_ptr2],4
+// ARGUMENT REDUCTION CODE - CASE 1 and 2
+// Load 2**(-2).
+// Load -2**(-2).
+(p0) fmpy.s1 N = Arg,two_by_PI
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p0) ldfs NEGTWO_TO_NEG2 = [table_ptr2],12
+//
+// N = Arg * 2/pi
+//
+(p0) fcmp.lt.unc.s1 p8,p9= Arg,PI_BY_4
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// if Arg < pi/4, set PR_8.
+//
+(p8) fcmp.gt.s1 p8,p9= Arg,MPI_BY_4
+ nop.i 999 ;;
+}
+//
+// Case 1: Is |r| < 2**(-2).
+// Arg is the same as r in this case.
+// r = Arg
+// c = 0
+//
+
+{ .mfi
+(p8) mov N_fix_gr = r0
+//
+// if Arg > -pi/4, reset PR_8.
+// Select the case when |Arg| < pi/4 - set PR[8] = true.
+// Else Select the case when |Arg| >= pi/4 - set PR[9] = true.
+//
+(p0) fcvt.fx.s1 N_fix = N
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Grab the integer part of N .
+//
+(p8) mov r = Arg
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p8) mov c = f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p8) fcmp.lt.unc.s1 p10, p11 = Arg, TWO_TO_NEG2
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p10) fcmp.gt.s1 p10,p0 = Arg, NEGTWO_TO_NEG2
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Case 2: Place integer part of N in GP register.
+//
+(p9) fcvt.xf N = N_fix
+ nop.i 999 ;;
+}
+
+{ .mib
+(p9) getf.sig N_fix_gr = N_fix
+ nop.i 999
+//
+// Case 2: Convert integer N_fix back to normalized floating-point value.
+//
+(p10) br.cond.spnt TAN_SMALL_R ;;
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+(p8) br.cond.sptk TAN_NORMAL_R ;;
+}
+//
+// Case 1: PR_3 is only affected when PR_1 is set.
+//
+
+{ .mmi
+(p9) ldfs TWO_TO_NEG33 = [table_ptr2], 4 ;;
+//
+// Case 2: Load 2**(-33).
+//
+(p9) ldfs NEGTWO_TO_NEG33 = [table_ptr2], 4
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Case 2: Load -2**(-33).
+//
+(p9) fnma.s1 s_val = N, P_1, Arg
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p9) fmpy.s1 w = N, P_2
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Case 2: w = N * P_2
+// Case 2: s_val = -N * P_1 + Arg
+//
+(p0) fcmp.lt.unc.s1 p9,p8 = s_val, TWO_TO_NEG33
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Decide between case_1 and case_2 reduce:
+//
+(p9) fcmp.gt.s1 p9, p8 = s_val, NEGTWO_TO_NEG33
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Case 1_reduce: s <= -2**(-33) or s >= 2**(-33)
+// Case 2_reduce: -2**(-33) < s < 2**(-33)
+//
+(p8) fsub.s1 r = s_val, w
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p9) fmpy.s1 w = N, P_3
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p9) fma.s1 U_1 = N, P_2, w
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+//
+// Case 1_reduce: Is |r| < 2**(-2), if so set PR_10
+// else set PR_11.
+//
+(p8) fsub.s1 c = s_val, r
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Case 1_reduce: r = s + w (change sign)
+// Case 2_reduce: w = N * P_3 (change sign)
+//
+(p8) fcmp.lt.unc.s1 p10, p11 = r, TWO_TO_NEG2
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p10) fcmp.gt.s1 p10, p11 = r, NEGTWO_TO_NEG2
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p9) fsub.s1 r = s_val, U_1
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+//
+// Case 1_reduce: c is complete here.
+// c = c + w (w has not been negated.)
+// Case 2_reduce: r is complete here - continue to calculate c .
+// r = s - U_1
+//
+(p9) fms.s1 U_2 = N, P_2, U_1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Case 1_reduce: c = s - r
+// Case 2_reduce: U_1 = N * P_2 + w
+//
+(p8) fsub.s1 c = c, w
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p9) fsub.s1 s_val = s_val, r
+ nop.i 999
+}
+
+{ .mfb
+ nop.m 999
+//
+// Case 2_reduce:
+// U_2 = N * P_2 - U_1
+// Not needed until later.
+//
+(p9) fadd.s1 U_2 = U_2, w
+//
+// Case 2_reduce:
+// s = s - r
+// U_2 = U_2 + w
+//
+(p10) br.cond.spnt TAN_SMALL_R ;;
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+(p11) br.cond.sptk TAN_NORMAL_R ;;
+}
+
+{ .mii
+ nop.m 999
+//
+// Case 2_reduce:
+// c = c - U_2
+// c is complete here
+// Argument reduction ends here.
+//
+(p9) extr.u i_1 = N_fix_gr, 0, 1 ;;
+(p9) cmp.eq.unc p11, p12 = 0x0000,i_1 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Is i_1 even or odd?
+// if i_1 == 0, set p11, else set p12.
+//
+(p11) fmpy.s1 rsq = r, Z
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p12) frcpa.s1 S_hi,p0 = f1, r
+ nop.i 999
+}
+
+//
+// Case 1: Branch to SMALL_R or NORMAL_R.
+// Case 1 is done now.
+//
+
+{ .mfi
+(p9) addl table_ptr1 = @ltoff(TAN_BASE_CONSTANTS), gp
+(p9) fsub.s1 c = s_val, U_1
+ nop.i 999 ;;
+}
+;;
+
+{ .mmi
+(p9) ld8 table_ptr1 = [table_ptr1]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+{ .mmi
+(p9) add table_ptr1 = 224, table_ptr1 ;;
+(p9) ldfe P1_1 = [table_ptr1],144
+ nop.i 999 ;;
+}
+//
+// Get [i_1] - lsb of N_fix_gr .
+// Load P1_1 and point to Q1_1 .
+//
+
+{ .mfi
+(p9) ldfe Q1_1 = [table_ptr1] , 0
+//
+// N even: rsq = r * Z
+// N odd: S_hi = frcpa(r)
+//
+(p12) fmerge.ns S_hi = S_hi, S_hi
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+//
+// Case 2_reduce:
+// c = s - U_1
+//
+(p9) fsub.s1 c = c, U_2
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p12) fma.s1 poly1 = S_hi, r, f1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N odd: Change sign of S_hi
+//
+(p11) fmpy.s1 rsq = rsq, P1_1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p12) fma.s1 S_hi = S_hi, poly1, S_hi
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N even: rsq = rsq * P1_1
+// N odd: poly1 = 1.0 + S_hi * r 16 bits partial account for necessary
+//
+(p11) fma.s1 Result = r, rsq, c
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N even: Result = c + r * rsq
+// N odd: S_hi = S_hi + S_hi*poly1 16 bits account for necessary
+//
+(p12) fma.s1 poly1 = S_hi, r, f1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N even: Result = Result + r
+// N odd: poly1 = 1.0 + S_hi * r 32 bits partial
+//
+(p11) fadd.s0 Result = r, Result
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p12) fma.s1 S_hi = S_hi, poly1, S_hi
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N even: Result1 = Result + r
+// N odd: S_hi = S_hi * poly1 + S_hi 32 bits
+//
+(p12) fma.s1 poly1 = S_hi, r, f1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N odd: poly1 = S_hi * r + 1.0 64 bits partial
+//
+(p12) fma.s1 S_hi = S_hi, poly1, S_hi
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N odd: poly1 = S_hi * poly + 1.0 64 bits
+//
+(p12) fma.s1 poly1 = S_hi, r, f1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N odd: poly1 = S_hi * r + 1.0
+//
+(p12) fma.s1 poly1 = S_hi, c, poly1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N odd: poly1 = S_hi * c + poly1
+//
+(p12) fmpy.s1 S_lo = S_hi, poly1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N odd: S_lo = S_hi * poly1
+//
+(p12) fma.s1 S_lo = Q1_1, r, S_lo
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+//
+// N odd: Result = S_hi + S_lo
+//
+(p0) fmpy.s0 fp_tmp = fp_tmp, fp_tmp // Dummy mult to set inexact
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+//
+// N odd: S_lo = S_lo + Q1_1 * r
+//
+(p12) fadd.s0 Result = S_hi, S_lo
+(p0) br.ret.sptk b0 ;;
+}
+
+
+TAN_LARGER_ARG:
+
+{ .mmf
+(p0) addl table_ptr1 = @ltoff(TAN_BASE_CONSTANTS), gp
+ nop.m 999
+(p0) fmpy.s1 N_0 = Arg, Inv_P_0
+}
+;;
+
+//
+// ARGUMENT REDUCTION CODE - CASE 3 and 4
+//
+//
+// Adjust table_ptr1 to beginning of table.
+// N_0 = Arg * Inv_P_0
+//
+
+
+{ .mmi
+(p0) ld8 table_ptr1 = [table_ptr1]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+
+{ .mmi
+(p0) add table_ptr1 = 8, table_ptr1 ;;
+//
+// Point to 2*-14
+//
+(p0) ldfs TWO_TO_NEG14 = [table_ptr1], 4
+ nop.i 999 ;;
+}
+//
+// Load 2**(-14).
+//
+
+{ .mmi
+(p0) ldfs NEGTWO_TO_NEG14 = [table_ptr1], 180 ;;
+//
+// N_0_fix = integer part of N_0 .
+// Adjust table_ptr1 to beginning of table.
+//
+(p0) ldfs TWO_TO_NEG2 = [table_ptr1], 4
+ nop.i 999 ;;
+}
+//
+// Make N_0 the integer part.
+//
+
+{ .mfi
+(p0) ldfs NEGTWO_TO_NEG2 = [table_ptr1]
+//
+// Load -2**(-14).
+//
+(p0) fcvt.fx.s1 N_0_fix = N_0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fcvt.xf N_0 = N_0_fix
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fnma.s1 ArgPrime = N_0, P_0, Arg
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fmpy.s1 w = N_0, d_1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// ArgPrime = -N_0 * P_0 + Arg
+// w = N_0 * d_1
+//
+(p0) fmpy.s1 N = ArgPrime, two_by_PI
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N = ArgPrime * 2/pi
+//
+(p0) fcvt.fx.s1 N_fix = N
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N_fix is the integer part.
+//
+(p0) fcvt.xf N = N_fix
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p0) getf.sig N_fix_gr = N_fix
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N is the integer part of the reduced-reduced argument.
+// Put the integer in a GP register.
+//
+(p0) fnma.s1 s_val = N, P_1, ArgPrime
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fnma.s1 w = N, P_2, w
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// s_val = -N*P_1 + ArgPrime
+// w = -N*P_2 + w
+//
+(p0) fcmp.lt.unc.s1 p11, p10 = s_val, TWO_TO_NEG14
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p11) fcmp.gt.s1 p11, p10 = s_val, NEGTWO_TO_NEG14
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Case 3: r = s_val + w (Z complete)
+// Case 4: U_hi = N_0 * d_1
+//
+(p10) fmpy.s1 V_hi = N, P_2
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p11) fmpy.s1 U_hi = N_0, d_1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Case 3: r = s_val + w (Z complete)
+// Case 4: U_hi = N_0 * d_1
+//
+(p11) fmpy.s1 V_hi = N, P_2
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p11) fmpy.s1 U_hi = N_0, d_1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Decide between case 3 and 4:
+// Case 3: s <= -2**(-14) or s >= 2**(-14)
+// Case 4: -2**(-14) < s < 2**(-14)
+//
+(p10) fadd.s1 r = s_val, w
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p11) fmpy.s1 w = N, P_3
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Case 4: We need abs of both U_hi and V_hi - dont
+// worry about switched sign of V_hi .
+//
+(p11) fsub.s1 A = U_hi, V_hi
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+//
+// Case 4: A = U_hi + V_hi
+// Note: Worry about switched sign of V_hi, so subtract instead of add.
+//
+(p11) fnma.s1 V_lo = N, P_2, V_hi
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p11) fms.s1 U_lo = N_0, d_1, U_hi
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p11) fabs V_hiabs = V_hi
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+//
+// Case 4: V_hi = N * P_2
+// w = N * P_3
+// Note the product does not include the (-) as in the writeup
+// so (-) missing for V_hi and w .
+(p10) fadd.s1 r = s_val, w
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Case 3: c = s_val - r
+// Case 4: U_lo = N_0 * d_1 - U_hi
+//
+(p11) fabs U_hiabs = U_hi
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p11) fmpy.s1 w = N, P_3
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Case 4: Set P_12 if U_hiabs >= V_hiabs
+//
+(p11) fadd.s1 C_hi = s_val, A
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Case 4: C_hi = s_val + A
+//
+(p11) fadd.s1 t = U_lo, V_lo
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Case 3: Is |r| < 2**(-2), if so set PR_7
+// else set PR_8.
+// Case 3: If PR_7 is set, prepare to branch to Small_R.
+// Case 3: If PR_8 is set, prepare to branch to Normal_R.
+//
+(p10) fsub.s1 c = s_val, r
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Case 3: c = (s - r) + w (c complete)
+//
+(p11) fcmp.ge.unc.s1 p12, p13 = U_hiabs, V_hiabs
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p11) fms.s1 w = N_0, d_2, w
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Case 4: V_hi = N * P_2
+// w = N * P_3
+// Note the product does not include the (-) as in the writeup
+// so (-) missing for V_hi and w .
+//
+(p10) fcmp.lt.unc.s1 p14, p15 = r, TWO_TO_NEG2
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p14) fcmp.gt.s1 p14, p15 = r, NEGTWO_TO_NEG2
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+//
+// Case 4: V_lo = -N * P_2 - V_hi (U_hi is in place of V_hi in writeup)
+// Note: the (-) is still missing for V_hi .
+// Case 4: w = w + N_0 * d_2
+// Note: the (-) is now incorporated in w .
+//
+(p10) fadd.s1 c = c, w
+//
+// Case 4: t = U_lo + V_lo
+// Note: remember V_lo should be (-), subtract instead of add. NO
+//
+(p14) br.cond.spnt TAN_SMALL_R ;;
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+(p15) br.cond.spnt TAN_NORMAL_R ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Case 3: Vector off when |r| < 2**(-2). Recall that PR_3 will be true.
+// The remaining stuff is for Case 4.
+//
+(p12) fsub.s1 a = U_hi, A
+(p11) extr.u i_1 = N_fix_gr, 0, 1 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Case 4: C_lo = s_val - C_hi
+//
+(p11) fadd.s1 t = t, w
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p13) fadd.s1 a = V_hi, A
+ nop.i 999 ;;
+}
+
+//
+// Case 4: a = U_hi - A
+// a = V_hi - A (do an add to account for missing (-) on V_hi
+//
+
+{ .mfi
+(p11) addl table_ptr1 = @ltoff(TAN_BASE_CONSTANTS), gp
+(p11) fsub.s1 C_lo = s_val, C_hi
+ nop.i 999
+}
+;;
+
+{ .mmi
+(p11) ld8 table_ptr1 = [table_ptr1]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+//
+// Case 4: a = (U_hi - A) + V_hi
+// a = (V_hi - A) + U_hi
+// In each case account for negative missing form V_hi .
+//
+//
+// Case 4: C_lo = (s_val - C_hi) + A
+//
+
+{ .mmi
+(p11) add table_ptr1 = 224, table_ptr1 ;;
+(p11) ldfe P1_1 = [table_ptr1], 16
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p11) ldfe P1_2 = [table_ptr1], 128
+//
+// Case 4: w = U_lo + V_lo + w
+//
+(p12) fsub.s1 a = a, V_hi
+ nop.i 999 ;;
+}
+//
+// Case 4: r = C_hi + C_lo
+//
+
+{ .mfi
+(p11) ldfe Q1_1 = [table_ptr1], 16
+(p11) fadd.s1 C_lo = C_lo, A
+ nop.i 999 ;;
+}
+//
+// Case 4: c = C_hi - r
+// Get [i_1] - lsb of N_fix_gr.
+//
+
+{ .mfi
+(p11) ldfe Q1_2 = [table_ptr1], 16
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p13) fsub.s1 a = U_hi, a
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p11) fadd.s1 t = t, a
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Case 4: t = t + a
+//
+(p11) fadd.s1 C_lo = C_lo, t
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Case 4: C_lo = C_lo + t
+//
+(p11) fadd.s1 r = C_hi, C_lo
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p11) fsub.s1 c = C_hi, r
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+//
+// Case 4: c = c + C_lo finished.
+// Is i_1 even or odd?
+// if i_1 == 0, set PR_4, else set PR_5.
+//
+// r and c have been computed.
+// We known whether this is the sine or cosine routine.
+// Make sure ftz mode is set - should be automatic when using wre
+(p0) fmpy.s1 rsq = r, r
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p11) fadd.s1 c = c , C_lo
+(p11) cmp.eq.unc p11, p12 = 0x0000, i_1 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p12) frcpa.s1 S_hi, p0 = f1, r
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+//
+// N odd: Change sign of S_hi
+//
+(p11) fma.s1 Result = rsq, P1_2, P1_1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p12) fma.s1 P = rsq, Q1_2, Q1_1
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+//
+// N odd: Result = S_hi + S_lo (User supplied rounding mode for C1)
+//
+(p0) fmpy.s0 fp_tmp = fp_tmp, fp_tmp // Dummy mult to set inexact
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N even: rsq = r * r
+// N odd: S_hi = frcpa(r)
+//
+(p12) fmerge.ns S_hi = S_hi, S_hi
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+//
+// N even: rsq = rsq * P1_2 + P1_1
+// N odd: poly1 = 1.0 + S_hi * r 16 bits partial account for necessary
+//
+(p11) fmpy.s1 Result = rsq, Result
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p12) fma.s1 poly1 = S_hi, r,f1
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+//
+// N even: Result = Result * rsq
+// N odd: S_hi = S_hi + S_hi*poly1 16 bits account for necessary
+//
+(p11) fma.s1 Result = r, Result, c
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p12) fma.s1 S_hi = S_hi, poly1, S_hi
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+//
+// N odd: S_hi = S_hi * poly1 + S_hi 32 bits
+//
+(p11) fadd.s0 Result= r, Result
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p12) fma.s1 poly1 = S_hi, r, f1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N even: Result = Result * r + c
+// N odd: poly1 = 1.0 + S_hi * r 32 bits partial
+//
+(p12) fma.s1 S_hi = S_hi, poly1, S_hi
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p12) fma.s1 poly1 = S_hi, r, f1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N even: Result1 = Result + r (Rounding mode S0)
+// N odd: poly1 = S_hi * r + 1.0 64 bits partial
+//
+(p12) fma.s1 S_hi = S_hi, poly1, S_hi
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N odd: poly1 = S_hi * poly + S_hi 64 bits
+//
+(p12) fma.s1 poly1 = S_hi, r, f1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N odd: poly1 = S_hi * r + 1.0
+//
+(p12) fma.s1 poly1 = S_hi, c, poly1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N odd: poly1 = S_hi * c + poly1
+//
+(p12) fmpy.s1 S_lo = S_hi, poly1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N odd: S_lo = S_hi * poly1
+//
+(p12) fma.s1 S_lo = P, r, S_lo
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+//
+// N odd: S_lo = S_lo + r * P
+//
+(p12) fadd.s0 Result = S_hi, S_lo
+(p0) br.ret.sptk b0 ;;
+}
+
+
+TAN_SMALL_R:
+
+{ .mii
+ nop.m 999
+(p0) extr.u i_1 = N_fix_gr, 0, 1 ;;
+(p0) cmp.eq.unc p11, p12 = 0x0000, i_1
+}
+
+{ .mfi
+ nop.m 999
+(p0) fmpy.s1 rsq = r, r
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p12) frcpa.s1 S_hi, p0 = f1, r
+ nop.i 999
+}
+
+{ .mfi
+(p0) addl table_ptr1 = @ltoff(TAN_BASE_CONSTANTS), gp
+ nop.f 999
+ nop.i 999
+}
+;;
+
+{ .mmi
+(p0) ld8 table_ptr1 = [table_ptr1]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+// *****************************************************************
+// *****************************************************************
+// *****************************************************************
+
+{ .mmi
+(p0) add table_ptr1 = 224, table_ptr1 ;;
+(p0) ldfe P1_1 = [table_ptr1], 16
+ nop.i 999 ;;
+}
+// r and c have been computed.
+// We known whether this is the sine or cosine routine.
+// Make sure ftz mode is set - should be automatic when using wre
+// |r| < 2**(-2)
+
+{ .mfi
+(p0) ldfe P1_2 = [table_ptr1], 16
+(p11) fmpy.s1 r_to_the_8 = rsq, rsq
+ nop.i 999 ;;
+}
+//
+// Set table_ptr1 to beginning of constant table.
+// Get [i_1] - lsb of N_fix_gr.
+//
+
+{ .mfi
+(p0) ldfe P1_3 = [table_ptr1], 96
+//
+// N even: rsq = r * r
+// N odd: S_hi = frcpa(r)
+//
+(p12) fmerge.ns S_hi = S_hi, S_hi
+ nop.i 999 ;;
+}
+//
+// Is i_1 even or odd?
+// if i_1 == 0, set PR_11.
+// if i_1 != 0, set PR_12.
+//
+
+{ .mfi
+(p11) ldfe P1_9 = [table_ptr1], -16
+//
+// N even: Poly2 = P1_7 + Poly2 * rsq
+// N odd: poly2 = Q1_5 + poly2 * rsq
+//
+(p11) fadd.s1 CORR = rsq, f1
+ nop.i 999 ;;
+}
+
+{ .mmi
+(p11) ldfe P1_8 = [table_ptr1], -16 ;;
+//
+// N even: Poly1 = P1_2 + P1_3 * rsq
+// N odd: poly1 = 1.0 + S_hi * r
+// 16 bits partial account for necessary (-1)
+//
+(p11) ldfe P1_7 = [table_ptr1], -16
+ nop.i 999 ;;
+}
+//
+// N even: Poly1 = P1_1 + Poly1 * rsq
+// N odd: S_hi = S_hi + S_hi * poly1) 16 bits account for necessary
+//
+
+{ .mfi
+(p11) ldfe P1_6 = [table_ptr1], -16
+//
+// N even: Poly2 = P1_5 + Poly2 * rsq
+// N odd: poly2 = Q1_3 + poly2 * rsq
+//
+(p11) fmpy.s1 r_to_the_8 = r_to_the_8, r_to_the_8
+ nop.i 999 ;;
+}
+//
+// N even: Poly1 = Poly1 * rsq
+// N odd: poly1 = 1.0 + S_hi * r 32 bits partial
+//
+
+{ .mfi
+(p11) ldfe P1_5 = [table_ptr1], -16
+(p12) fma.s1 poly1 = S_hi, r, f1
+ nop.i 999 ;;
+}
+//
+// N even: CORR = CORR * c
+// N odd: S_hi = S_hi * poly1 + S_hi 32 bits
+//
+
+//
+// N even: Poly2 = P1_6 + Poly2 * rsq
+// N odd: poly2 = Q1_4 + poly2 * rsq
+//
+{ .mmf
+(p0) addl table_ptr2 = @ltoff(TAN_BASE_CONSTANTS), gp
+(p11) ldfe P1_4 = [table_ptr1], -16
+(p11) fmpy.s1 CORR = CORR, c
+}
+;;
+
+
+{ .mmi
+(p0) ld8 table_ptr2 = [table_ptr2]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+
+{ .mii
+(p0) add table_ptr2 = 464, table_ptr2
+ nop.i 999 ;;
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p11) fma.s1 Poly1 = P1_3, rsq, P1_2
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p0) ldfe Q1_7 = [table_ptr2], -16
+(p12) fma.s1 S_hi = S_hi, poly1, S_hi
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p0) ldfe Q1_6 = [table_ptr2], -16
+(p11) fma.s1 Poly2 = P1_9, rsq, P1_8
+ nop.i 999 ;;
+}
+
+{ .mmi
+(p0) ldfe Q1_5 = [table_ptr2], -16 ;;
+(p12) ldfe Q1_4 = [table_ptr2], -16
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p12) ldfe Q1_3 = [table_ptr2], -16
+//
+// N even: Poly2 = P1_8 + P1_9 * rsq
+// N odd: poly2 = Q1_6 + Q1_7 * rsq
+//
+(p11) fma.s1 Poly1 = Poly1, rsq, P1_1
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p12) ldfe Q1_2 = [table_ptr2], -16
+(p12) fma.s1 poly1 = S_hi, r, f1
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p12) ldfe Q1_1 = [table_ptr2], -16
+(p11) fma.s1 Poly2 = Poly2, rsq, P1_7
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N even: CORR = rsq + 1
+// N even: r_to_the_8 = rsq * rsq
+//
+(p11) fmpy.s1 Poly1 = Poly1, rsq
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p12) fma.s1 S_hi = S_hi, poly1, S_hi
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p12) fma.s1 poly2 = Q1_7, rsq, Q1_6
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p11) fma.s1 Poly2 = Poly2, rsq, P1_6
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p12) fma.s1 poly1 = S_hi, r, f1
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p12) fma.s1 poly2 = poly2, rsq, Q1_5
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p11) fma.s1 Poly2= Poly2, rsq, P1_5
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p12) fma.s1 S_hi = S_hi, poly1, S_hi
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p12) fma.s1 poly2 = poly2, rsq, Q1_4
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N even: r_to_the_8 = r_to_the_8 * r_to_the_8
+// N odd: poly1 = S_hi * r + 1.0 64 bits partial
+//
+(p11) fma.s1 Poly2 = Poly2, rsq, P1_4
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N even: Result = CORR + Poly * r
+// N odd: P = Q1_1 + poly2 * rsq
+//
+(p12) fma.s1 poly1 = S_hi, r, f1
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p12) fma.s1 poly2 = poly2, rsq, Q1_3
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N even: Poly2 = P1_4 + Poly2 * rsq
+// N odd: poly2 = Q1_2 + poly2 * rsq
+//
+(p11) fma.s1 Poly = Poly2, r_to_the_8, Poly1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p12) fma.s1 poly1 = S_hi, c, poly1
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p12) fma.s1 poly2 = poly2, rsq, Q1_2
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N even: Poly = Poly1 + Poly2 * r_to_the_8
+// N odd: S_hi = S_hi * poly1 + S_hi 64 bits
+//
+(p11) fma.s1 Result = Poly, r, CORR
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N even: Result = r + Result (User supplied rounding mode)
+// N odd: poly1 = S_hi * c + poly1
+//
+(p12) fmpy.s1 S_lo = S_hi, poly1
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p12) fma.s1 P = poly2, rsq, Q1_1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N odd: poly1 = S_hi * r + 1.0
+//
+(p11) fadd.s0 Result = Result, r
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N odd: S_lo = S_hi * poly1
+//
+(p12) fma.s1 S_lo = Q1_1, c, S_lo
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+//
+// N odd: Result = Result + S_hi (user supplied rounding mode)
+//
+(p0) fmpy.s0 fp_tmp = fp_tmp, fp_tmp // Dummy mult to set inexact
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N odd: S_lo = Q1_1 * c + S_lo
+//
+(p12) fma.s1 Result = P, r, S_lo
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+//
+// N odd: Result = S_lo + r * P
+//
+(p12) fadd.s0 Result = Result, S_hi
+(p0) br.ret.sptk b0 ;;
+}
+
+
+TAN_NORMAL_R:
+
+{ .mfi
+(p0) getf.sig sig_r = r
+// *******************************************************************
+// *******************************************************************
+// *******************************************************************
+//
+// r and c have been computed.
+// Make sure ftz mode is set - should be automatic when using wre
+//
+//
+// Get [i_1] - lsb of N_fix_gr alone.
+//
+(p0) fmerge.s Pos_r = f1, r
+(p0) extr.u i_1 = N_fix_gr, 0, 1 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fmerge.s sgn_r = r, f1
+(p0) cmp.eq.unc p11, p12 = 0x0000, i_1 ;;
+}
+
+{ .mfi
+ nop.m 999
+ nop.f 999
+(p0) extr.u lookup = sig_r, 58, 5
+}
+
+{ .mlx
+ nop.m 999
+(p0) movl Create_B = 0x8200000000000000 ;;
+}
+
+{ .mfi
+(p0) addl table_ptr1 = @ltoff(TAN_BASE_CONSTANTS), gp
+ nop.f 999
+(p0) dep Create_B = lookup, Create_B, 58, 5
+}
+;;
+
+//
+// Get [i_1] - lsb of N_fix_gr alone.
+// Pos_r = abs (r)
+//
+
+
+{ .mmi
+ ld8 table_ptr1 = [table_ptr1]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+
+{ .mmi
+ nop.m 999
+(p0) setf.sig B = Create_B
+//
+// Set table_ptr1 and table_ptr2 to base address of
+// constant table.
+//
+(p0) add table_ptr1 = 480, table_ptr1 ;;
+}
+
+{ .mmb
+ nop.m 999
+//
+// Is i_1 or i_0 == 0 ?
+// Create the constant 1 00000 1000000000000000000000...
+//
+(p0) ldfe P2_1 = [table_ptr1], 16
+ nop.b 999
+}
+
+{ .mmi
+ nop.m 999 ;;
+(p0) getf.exp exp_r = Pos_r
+ nop.i 999
+}
+//
+// Get r's exponent
+// Get r's significand
+//
+
+{ .mmi
+(p0) ldfe P2_2 = [table_ptr1], 16 ;;
+//
+// Get the 5 bits or r for the lookup. 1.xxxxx ....
+// from sig_r.
+// Grab lsb of exp of B
+//
+(p0) ldfe P2_3 = [table_ptr1], 16
+ nop.i 999 ;;
+}
+
+{ .mii
+ nop.m 999
+(p0) andcm table_offset = 0x0001, exp_r ;;
+(p0) shl table_offset = table_offset, 9 ;;
+}
+
+{ .mii
+ nop.m 999
+//
+// Deposit 0 00000 1000000000000000000000... on
+// 1 xxxxx yyyyyyyyyyyyyyyyyyyyyy...,
+// getting rid of the ys.
+// Is B = 2** -2 or B= 2** -1? If 2**-1, then
+// we want an offset of 512 for table addressing.
+//
+(p0) shladd table_offset = lookup, 4, table_offset ;;
+//
+// B = ........ 1xxxxx 1000000000000000000...
+//
+(p0) add table_ptr1 = table_ptr1, table_offset ;;
+}
+
+{ .mmb
+ nop.m 999
+//
+// B = ........ 1xxxxx 1000000000000000000...
+// Convert B so it has the same exponent as Pos_r
+//
+(p0) ldfd T_hi = [table_ptr1], 8
+ nop.b 999 ;;
+}
+
+//
+// x = |r| - B
+// Load T_hi.
+// Load C_hi.
+//
+
+{ .mmf
+(p0) addl table_ptr2 = @ltoff(TAN_BASE_CONSTANTS), gp
+(p0) ldfs T_lo = [table_ptr1]
+(p0) fmerge.se B = Pos_r, B
+}
+;;
+
+{ .mmi
+ ld8 table_ptr2 = [table_ptr2]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+{ .mii
+(p0) add table_ptr2 = 1360, table_ptr2
+ nop.i 999 ;;
+(p0) add table_ptr2 = table_ptr2, table_offset ;;
+}
+
+{ .mfi
+(p0) ldfd C_hi = [table_ptr2], 8
+(p0) fsub.s1 x = Pos_r, B
+ nop.i 999 ;;
+}
+
+{ .mii
+(p0) ldfs C_lo = [table_ptr2],255
+ nop.i 999 ;;
+//
+// xsq = x * x
+// N even: Tx = T_hi * x
+// Load T_lo.
+// Load C_lo - increment pointer to get SC_inv
+// - cant get all the way, do an add later.
+//
+(p0) add table_ptr2 = 569, table_ptr2 ;;
+}
+//
+// N even: Tx1 = Tx + 1
+// N odd: Cx1 = 1 - Cx
+//
+
+{ .mfi
+(p0) ldfe SC_inv = [table_ptr2], 0
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fmpy.s1 xsq = x, x
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p11) fmpy.s1 Tx = T_hi, x
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p12) fmpy.s1 Cx = C_hi, x
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N odd: Cx = C_hi * x
+//
+(p0) fma.s1 P = P2_3, xsq, P2_2
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+//
+// N even and odd: P = P2_3 + P2_2 * xsq
+//
+(p11) fadd.s1 Tx1 = Tx, f1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N even: D = C_hi - tanx
+// N odd: D = T_hi + tanx
+//
+(p11) fmpy.s1 CORR = SC_inv, T_hi
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fmpy.s1 Sx = SC_inv, x
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p12) fmpy.s1 CORR = SC_inv, C_hi
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p12) fsub.s1 V_hi = f1, Cx
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 P = P, xsq, P2_1
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+//
+// N even and odd: P = P2_1 + P * xsq
+//
+(p11) fma.s1 V_hi = Tx, Tx1, f1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N even: Result = sgn_r * tail + T_hi (user rounding mode for C1)
+// N odd: Result = sgn_r * tail + C_hi (user rounding mode for C1)
+//
+(p0) fmpy.s0 fp_tmp = fp_tmp, fp_tmp // Dummy mult to set inexact
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fmpy.s1 CORR = CORR, c
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p12) fnma.s1 V_hi = Cx,V_hi,f1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N even: V_hi = Tx * Tx1 + 1
+// N odd: Cx1 = 1 - Cx * Cx1
+//
+(p0) fmpy.s1 P = P, xsq
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+//
+// N even and odd: P = P * xsq
+//
+(p11) fmpy.s1 V_hi = V_hi, T_hi
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N even and odd: tail = P * tail + V_lo
+//
+(p11) fmpy.s1 T_hi = sgn_r, T_hi
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fmpy.s1 CORR = CORR, sgn_r
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p12) fmpy.s1 V_hi = V_hi,C_hi
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N even: V_hi = T_hi * V_hi
+// N odd: V_hi = C_hi * V_hi
+//
+(p0) fma.s1 tanx = P, x, x
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p12) fnmpy.s1 C_hi = sgn_r, C_hi
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N even: V_lo = 1 - V_hi + C_hi
+// N odd: V_lo = 1 - V_hi + T_hi
+//
+(p11) fadd.s1 CORR = CORR, T_lo
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p12) fsub.s1 CORR = CORR, C_lo
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N even and odd: tanx = x + x * P
+// N even and odd: Sx = SC_inv * x
+//
+(p11) fsub.s1 D = C_hi, tanx
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p12) fadd.s1 D = T_hi, tanx
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N odd: CORR = SC_inv * C_hi
+// N even: CORR = SC_inv * T_hi
+//
+(p0) fnma.s1 D = V_hi, D, f1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N even and odd: D = 1 - V_hi * D
+// N even and odd: CORR = CORR * c
+//
+(p0) fma.s1 V_hi = V_hi, D, V_hi
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N even and odd: V_hi = V_hi + V_hi * D
+// N even and odd: CORR = sgn_r * CORR
+//
+(p11) fnma.s1 V_lo = V_hi, C_hi, f1
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p12) fnma.s1 V_lo = V_hi, T_hi, f1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N even: CORR = COOR + T_lo
+// N odd: CORR = CORR - C_lo
+//
+(p11) fma.s1 V_lo = tanx, V_hi, V_lo
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p12) fnma.s1 V_lo = tanx, V_hi, V_lo
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N even: V_lo = V_lo + V_hi * tanx
+// N odd: V_lo = V_lo - V_hi * tanx
+//
+(p11) fnma.s1 V_lo = C_lo, V_hi, V_lo
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p12) fnma.s1 V_lo = T_lo, V_hi, V_lo
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N even: V_lo = V_lo - V_hi * C_lo
+// N odd: V_lo = V_lo - V_hi * T_lo
+//
+(p0) fmpy.s1 V_lo = V_hi, V_lo
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N even and odd: V_lo = V_lo * V_hi
+//
+(p0) fadd.s1 tail = V_hi, V_lo
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N even and odd: tail = V_hi + V_lo
+//
+(p0) fma.s1 tail = tail, P, V_lo
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N even: T_hi = sgn_r * T_hi
+// N odd : C_hi = -sgn_r * C_hi
+//
+(p0) fma.s1 tail = tail, Sx, CORR
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N even and odd: tail = Sx * tail + CORR
+//
+(p0) fma.s1 tail = V_hi, Sx, tail
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N even an odd: tail = Sx * V_hi + tail
+//
+(p11) fma.s0 Result = sgn_r, tail, T_hi
+ nop.i 999
+}
+
+{ .mfb
+ nop.m 999
+(p12) fma.s0 Result = sgn_r, tail, C_hi
+(p0) br.ret.sptk b0 ;;
+}
+
+.endp __libm_tan
+ASM_SIZE_DIRECTIVE(__libm_tan)
+
+
+
+// *******************************************************************
+// *******************************************************************
+// *******************************************************************
+//
+// Special Code to handle very large argument case.
+// Call int pi_by_2_reduce(&x,&r)
+// for |arguments| >= 2**63
+// (Arg or x) is in f8
+// Address to save r and c as double
+
+// (1) (2) (3) (call) (4)
+// sp -> + psp -> + psp -> + sp -> +
+// | | | |
+// | r50 ->| <- r50 f0 ->| r50 -> | -> c
+// | | | |
+// sp-32 -> | <- r50 f0 ->| f0 ->| <- r50 r49 -> | -> r
+// | | | |
+// | r49 ->| <- r49 Arg ->| <- r49 | -> x
+// | | | |
+// sp -64 ->| sp -64 ->| sp -64 ->| |
+//
+// save pfs save b0 restore gp
+// save gp restore b0
+// restore pfs
+
+
+
+.proc __libm_callout
+__libm_callout:
+TAN_ARG_TOO_LARGE:
+.prologue
+// (1)
+{ .mfi
+ add GR_Parameter_r =-32,sp // Parameter: r address
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+
+// (2)
+{ .mmi
+ stfe [GR_Parameter_r ] = f0,16 // Clear Parameter r on stack
+ add GR_Parameter_X = 16,sp // Parameter x address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+
+// (3)
+.body
+{ .mib
+ stfe [GR_Parameter_r ] = f0,-16 // Clear Parameter c on stack
+ nop.i 0
+ nop.b 0
+}
+{ .mib
+ stfe [GR_Parameter_X] = Arg // Store Parameter x on stack
+ nop.i 0
+(p0) br.call.sptk b0=__libm_pi_by_2_reduce#
+}
+;;
+
+
+// (4)
+{ .mmi
+ mov gp = GR_SAVE_GP // Restore gp
+(p0) mov N_fix_gr = r8
+ nop.i 999
+}
+;;
+
+{ .mmi
+(p0) ldfe Arg =[GR_Parameter_X],16
+(p0) ldfs TWO_TO_NEG2 = [table_ptr2],4
+ nop.i 999
+}
+;;
+
+
+{ .mmb
+(p0) ldfe r =[GR_Parameter_r ],16
+(p0) ldfs NEGTWO_TO_NEG2 = [table_ptr2],4
+ nop.b 999 ;;
+}
+
+{ .mfi
+(p0) ldfe c =[GR_Parameter_r ]
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Is |r| < 2**(-2)
+//
+(p0) fcmp.lt.unc.s1 p6, p0 = r, TWO_TO_NEG2
+ mov b0 = GR_SAVE_B0 // Restore return address
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p6) fcmp.gt.unc.s1 p6, p0 = r, NEGTWO_TO_NEG2
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+}
+;;
+
+{ .mbb
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+(p6) br.cond.spnt TAN_SMALL_R
+(p0) br.cond.sptk TAN_NORMAL_R
+}
+;;
+.endp __libm_callout
+ASM_SIZE_DIRECTIVE(__libm_callout)
+
+
+.proc __libm_TAN_SPECIAL
+__libm_TAN_SPECIAL:
+
+//
+// Code for NaNs, Unsupporteds, Infs, or +/- zero ?
+// Invalid raised for Infs and SNaNs.
+//
+
+{ .mfb
+ nop.m 999
+(p0) fmpy.s0 Arg = Arg, f0
+(p0) br.ret.sptk b0
+}
+.endp __libm_TAN_SPECIAL
+ASM_SIZE_DIRECTIVE(__libm_TAN_SPECIAL)
+
+
+.type __libm_pi_by_2_reduce#,@function
+.global __libm_pi_by_2_reduce#
diff --git a/sysdeps/ia64/fpu/s_atan.S b/sysdeps/ia64/fpu/s_atan.S
new file mode 100644
index 0000000..e3a5c85
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_atan.S
@@ -0,0 +1,953 @@
+.file "atan.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00: Initial version
+// 4/13/00: Improved speed
+// 4/19/00: Removed the qualifying predicate from the fmerge.s that
+// takes the absolute value.
+// 6/16/00: Reassigned FP registers to eliminate stalls on loads
+// 8/30/00: Saved 5 cycles in main path by rearranging large argument logic
+// and delaying use of result of fcmp in load by 1 group
+//
+// API
+//==============================================================
+// double atan( double x);
+//
+// Overview of operation
+//==============================================================
+// atan(x) = sign(X)pi/2 - atan(1/x)
+//
+// We have two paths: |x| > 1 and |x| <= 1
+//
+// |x| > 1
+// ==========================================
+//
+// c = frcpa(x) which is approximately 1/x
+//
+// xc = 1- B
+// B = 1-xc
+//
+// Approximate 1/(1-B)^k by a polynomial in B, poly(B)
+// k is 45.
+//
+// poly(B) = 1 + r1 B + r2 B^2 + ...+ r10 B^10
+//
+// c^k = (1-B)^k/x^k
+// c^k/(1-B)^k = 1/x^k
+// c^k poly(B) = 1/x^k
+
+// poly(x) = series(atan(1/x)) = 1/x - 1/3x^3 + 1/5x^5 - 1/7x^7 .... + 1/45 x^45
+// = 1/x^45 ( x^44 - x^42/3 + x^40/5 - x^38/7 ... +1)
+// = 1/x^45 ( y^22 - y^21/3 + y^20/5 - y^19/7 ... +1)
+//
+// = c^45 poly(B) poly(x)
+// = c^45 r(B) q(y)
+
+// q(y) = q0 + q1 y + q2 y^2 + ... + q22 y^22
+// where q22 is 1.0
+
+// atan(x) = sign(X)pi/2 - c^45 r(B) q(y)
+
+// |x| <= 1
+// ==========================================
+// poly(x) = series(atan(x)) = x - x^3/3 + x^5/5 + .....
+// poly(x) = series(atan(x)) = x + x^3(- 1/3 + x^2/5 + ..... +x^47/47)
+// poly(x) = series(atan(x)) = x + x^3(p0 + x^2/5 + ..... + x^44/47)
+// poly(x) = series(atan(x)) = x + x^3(p0 + y/5 + ..... + y^22/47)
+
+// where p0 is about -1/3.
+
+// atan(x) = poly(x)
+
+#include "libm_support.h"
+
+// Special Values
+//==============================================================
+// atan(QNAN) = QNAN
+// atan(SNAN) = quieted SNAN
+// atan(+-inf) = +- pi/2
+// atan(+-0) = +-0
+
+
+
+// Registers used
+//==============================================================
+
+// predicate registers used:
+// p6 -> p11
+
+// floating-point registers used:
+// f32 -> f127
+
+// general registers used
+// r32 -> r37
+
+// Assembly macros
+//==============================================================
+atan_Pi_by_2 = f32
+atan_S_PI = f33
+atan_ABS_f8 = f34
+
+atan_R0 = f35
+atan_R1 = f36
+atan_R2 = f37
+atan_R3 = f38
+atan_R4 = f39
+atan_R5 = f40
+atan_R6 = f41
+atan_R7 = f42
+atan_R8 = f43
+atan_R9 = f44
+atan_R10 = f45
+
+atan_Q0 = f46
+
+atan_Q1 = f47
+atan_Q2 = f48
+atan_Q3 = f49
+atan_Q4 = f50
+atan_Q5 = f51
+atan_Q6 = f52
+atan_Q7 = f53
+atan_Q8 = f54
+atan_Q9 = f55
+atan_Q10 = f56
+
+atan_Q11 = f57
+atan_Q12 = f58
+atan_Q13 = f59
+atan_Q14 = f60
+atan_Q15 = f61
+atan_Q16 = f62
+atan_Q17 = f63
+atan_Q18 = f64
+atan_Q19 = f65
+atan_Q20 = f66
+atan_Q21 = f67
+atan_Q22 = f68
+
+// P and Q constants are mutually exclusive
+// so they can share macro definitions
+atan_P0 = f46
+
+atan_P1 = f47
+atan_P2 = f48
+atan_P3 = f49
+atan_P4 = f10
+atan_P5 = f11
+atan_P6 = f12
+atan_P7 = f13
+atan_P10 = f103
+
+atan_P11 = f114
+atan_P12 = f58
+atan_P13 = f59
+atan_P14 = f60
+atan_P15 = f61
+atan_P16 = f62
+atan_P17 = f63
+atan_P18 = f64
+atan_P19 = f65
+atan_P20 = f14
+atan_P21 = f99
+atan_P22 = f68
+// end of P constant macros
+
+atan_C = f69
+atan_Y = f70
+atan_B = f71
+atan_Z = f72
+atan_V11 = f73
+atan_V12 = f74
+
+atan_V7 = f75
+atan_V8 = f76
+
+atan_W13 = f77
+atan_W11 = f78
+
+atan_V3 = f79
+atan_V4 = f80
+
+atan_G11 = f81
+atan_G12 = f82
+atan_G7 = f83
+atan_G8 = f84
+
+atan_Z1 = f85
+atan_W7 = f86
+
+atan_G3 = f87
+atan_W8 = f88
+atan_V9 = f89
+atan_V10 = f90
+
+atan_G10 = f91
+atan_W3 = f92
+atan_G4 = f93
+atan_G9 = f94
+
+atan_G6 = f95
+atan_W4 = f96
+atan_Z2 = f97
+atan_V6 = f98
+
+atan_V2 = f99
+atan_W6 = f100
+atan_W10 = f101
+atan_Y3 = f102
+
+atan_G2 = f103
+
+atan_Y8 = f104
+
+atan_G5 = f105
+atan_Z3 = f106
+atan_Z4 = f107
+atan_W2 = f108
+atan_V5 = f109
+
+atan_W5 = f110
+atan_G1 = f111
+atan_Y11 = f112
+
+atan_Z5 = f113
+atan_Z6 = f114
+atan_V1 = f115
+atan_W1 = f116
+
+atan_Z7 = f117
+atan_Q = f118
+atan_Z = f119
+atan_abs_f8 = f120
+
+atan_V13 = f121
+atan_Xcub = f122
+atan_Y12 = f123
+atan_P = f124
+
+atan_NORM_f8 = f125
+
+atan_P8 = f126
+atan_P9 = f127
+
+
+
+
+atan_GR_AD_R = r14
+atan_GR_AD_Q = r15
+atan_GR_AD_P = r16
+atan_GR_10172 = r17
+atan_GR_exp_f8 = r18
+atan_GR_signexp_f8 = r19
+atan_GR_exp_mask = r20
+
+
+
+
+/////////////////////////////////////////////////////////////
+
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
+.align 16
+
+double_atan_constants_R:
+ASM_TYPE_DIRECTIVE(double_atan_constants_R,@object)
+ data8 0xB36B46B9C5443CED, 0x0000401C //R8
+ data8 0x842633E0D126261F, 0x0000401F //R9
+ data8 0xBE04FFFFFFFF46E0, 0x00004010 //R4
+ data8 0xE8C62000244D66E2, 0x00004013 //R5
+ data8 0xF2790C001E3789B3, 0x00004016 //R6
+ data8 0xDCD2CCF97D7C764F, 0x00004019 //R7
+ data8 0xB40000000000000B, 0x00004004 //R1
+ data8 0xB265F3D38F5EE28F, 0x00004021 //R10
+ data8 0x8160000000000001, 0x00004009 //R2
+ data8 0xFD5BFFFFFFFE55CD, 0x0000400C //R3
+ data8 0xC90FDAA22168C235, 0x00003FFF // pi/2
+ASM_SIZE_DIRECTIVE(double_atan_constants_R)
+
+double_atan_constants_Q:
+ASM_TYPE_DIRECTIVE(double_atan_constants_Q,@object)
+ data8 0xEBD602FA7761BC33, 0x00003FF9 //Q8
+ data8 0x8CB1CABD6A91913C, 0x0000BFFA //Q9
+ data8 0x84C665C37D623CD2, 0x00003FF7 //Q4
+ data8 0x8DE0D1673DAEA9BC, 0x0000BFF8 //Q5
+ data8 0xF658ADBE2C6E6FCC, 0x00003FF8 //Q6
+
+ data8 0xB56307BE1DD3FFB6, 0x0000BFF9 //Q7
+ data8 0xAAAAAAAAAAAA8000, 0x0000BFFD //Q21
+ data8 0x8000000000000000, 0x00003FFF //Q22
+ data8 0x924924923A9D710C, 0x0000BFFC //Q19
+ data8 0xCCCCCCCCCC9380E7, 0x00003FFC //Q20
+
+ data8 0xA644DC250EFA2800, 0x00003FED //Q0
+ data8 0x83DEAE24EEBF5E44, 0x0000BFF1 //Q1
+ data8 0xC758CCC64793D4EC, 0x00003FF3 //Q2
+ data8 0xBFDC0B54E7C89DCE, 0x0000BFF5 //Q3
+ data8 0x888855199D1290AF, 0x0000BFFB //Q15
+
+ data8 0x9D89D3BE514B0178, 0x00003FFB //Q16
+ data8 0xBA2E8B4DEC70282A, 0x0000BFFB //Q17
+ data8 0xE38E38DF9E9FC83B, 0x00003FFB //Q18
+ data8 0x9F8781CC990029D9, 0x00003FFA //Q10
+ data8 0xB0B39472DEBA3C79, 0x0000BFFA //Q11
+
+ data8 0xC2AFAEF8C85B0BC6, 0x00003FFA //Q12
+ data8 0xD780E539797525DD, 0x0000BFFA //Q13
+ data8 0xF0EDC449AC786DF9, 0x00003FFA //Q14
+ASM_SIZE_DIRECTIVE(double_atan_constants_Q)
+
+
+
+double_atan_constants_P:
+ASM_TYPE_DIRECTIVE(double_atan_constants_P,@object)
+ data8 0xB1899EC590CDB8DF, 0x0000BFFA //P10
+ data8 0xA1E79850A67D59B0, 0x00003FFA //P11
+ data8 0x911D8B30C2A96E6D, 0x0000BFF3 //P20
+ data8 0xB87233C68A640706, 0x00003FF0 //P21
+ data8 0xD78E4B82F3C29D7A, 0x0000BFFA //P8
+
+ data8 0xC2EBE37AF932C14F, 0x00003FFA //P9
+ data8 0xBA2E8B94AA104DD6, 0x0000BFFB //P4
+ data8 0x9D89D7A640B71D38, 0x00003FFB //P5
+ data8 0x88887CA2CE9B2A40, 0x0000BFFB //P6
+ data8 0xF0F017D57A919C1E, 0x00003FFA //P7
+
+ data8 0xD0D635F230C80E06, 0x0000BFF8 //P16
+ data8 0xE847BECA7209B479, 0x00003FF7 //P17
+ data8 0xD14C6A2AAE0D5B07, 0x0000BFF6 //P18
+ data8 0x915F612A5C469117, 0x00003FF5 //P19
+ data8 0x921EDE5FD0DBBBE2, 0x0000BFFA //P12
+
+ data8 0xFFD303C2C8535445, 0x00003FF9 //P13
+ data8 0xD30DF50E295386F7, 0x0000BFF9 //P14
+ data8 0x9E81F2B1BBD210A8, 0x00003FF9 //P15
+ data8 0xAAAAAAAAAAAAA800, 0x0000BFFD //P0
+ data8 0xCCCCCCCCCCC7D476, 0x00003FFC //P1
+
+ data8 0x9249249247838066, 0x0000BFFC //P2
+ data8 0xE38E38E302290D68, 0x00003FFB //P3
+ data8 0xDF7F0A816F7E5025, 0x0000BFEC //P22
+ASM_SIZE_DIRECTIVE(double_atan_constants_P)
+
+
+.align 32
+.global atan#
+
+////////////////////////////////////////////////////////
+
+
+
+.section .text
+.proc atan#
+.align 32
+
+atan:
+
+{ .mmf
+(p0) addl atan_GR_AD_P = @ltoff(double_atan_constants_P), gp
+(p0) addl atan_GR_AD_Q = @ltoff(double_atan_constants_Q), gp
+(p0) fmerge.s atan_ABS_f8 = f0,f8
+}
+;;
+
+{ .mmf
+ ld8 atan_GR_AD_P = [atan_GR_AD_P]
+ ld8 atan_GR_AD_Q = [atan_GR_AD_Q]
+(p0) frcpa.s1 atan_C,p8 = f1,f8
+}
+;;
+
+{ .mmf
+(p0) addl atan_GR_AD_R = @ltoff(double_atan_constants_R), gp
+(p0) addl atan_GR_exp_mask = 0x1ffff, r0
+(p0) fma.s1 atan_Y = f8,f8,f0
+}
+;;
+
+// This fnorm takes faults or sets fault flags
+{ .mmf
+(p0) mov atan_GR_10172 = 0x10172
+ ld8 atan_GR_AD_R = [atan_GR_AD_R]
+(p0) fnorm atan_NORM_f8 = f8
+}
+;;
+
+
+// qnan snan inf norm unorm 0 -+
+// 1 1 0 0 0 1 11
+// c 7
+
+// p9 set if we have a NAN or +-0
+
+{ .mmf
+(p0) ldfe atan_Q8 = [atan_GR_AD_Q],16
+(p0) ldfe atan_P10 = [atan_GR_AD_P],16
+(p0) fclass.m.unc p9, p0 = f8, 0xc7
+}
+;;
+
+
+{ .mmi
+(p0) ldfe atan_Q9 = [atan_GR_AD_Q],16
+(p0) ldfe atan_P11 = [atan_GR_AD_P],16
+ nop.i 999
+}
+;;
+
+
+{ .mmf
+(p0) ldfe atan_Q4 = [atan_GR_AD_Q],16
+(p0) ldfe atan_P20 = [atan_GR_AD_P],16
+(p9) fma.d.s0 f8 = f8,f1,f0
+;;
+}
+
+// Exit if we have a NAN or +-0
+{ .mmb
+(p0) ldfe atan_Q5 = [atan_GR_AD_Q],16
+(p0) ldfe atan_P21 = [atan_GR_AD_P],16
+(p9) br.ret.spnt b0
+;;
+}
+
+
+// p6 is TRUE if |x| <= 1
+// p7 is TRUE if |x| > 1
+{ .mmf
+(p0) ldfe atan_Q6 = [atan_GR_AD_Q],16
+(p0) ldfe atan_P8 = [atan_GR_AD_P],16
+(p0) fcmp.le.unc p6,p7 = atan_ABS_f8, f1
+;;
+}
+
+
+{ .mfi
+(p0) ldfe atan_Q7 = [atan_GR_AD_Q],16
+(p0) fma.s1 atan_Z = atan_C, atan_C, f0
+ nop.i 999
+}
+{ .mfi
+(p0) ldfe atan_P9 = [atan_GR_AD_P],16
+(p0) fnma.s1 atan_B = atan_C,f8, f1
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+(p0) ldfe atan_Q21 = [atan_GR_AD_Q],16
+(p0) fma.s1 atan_V12 = atan_Y, atan_Y, f0
+ nop.i 999
+}
+{ .mfi
+(p0) ldfe atan_P4 = [atan_GR_AD_P],16
+(p0) fma.s1 atan_Xcub = f8, atan_Y , f0
+ nop.i 999
+;;
+}
+
+
+{ .mmi
+(p7) ldfe atan_Q22 = [atan_GR_AD_Q],16
+(p6) ldfe atan_P5 = [atan_GR_AD_P],16
+(p6) cmp.eq.unc p8,p0 = r0,r0
+;;
+}
+
+
+{ .mmi
+(p7) ldfe atan_Q19 = [atan_GR_AD_Q],16
+(p6) ldfe atan_P6 = [atan_GR_AD_P],16
+(p7) cmp.eq.unc p9,p0 = r0,r0
+;;
+}
+
+
+{ .mmi
+(p7) ldfe atan_Q20 = [atan_GR_AD_Q],16
+(p6) ldfe atan_P7 = [atan_GR_AD_P],16
+ nop.i 999
+;;
+}
+
+{ .mfi
+(p7) ldfe atan_Q0 = [atan_GR_AD_Q],16
+(p6) fma.s1 atan_V13 = atan_Y, atan_P11, atan_P10
+ nop.i 999
+}
+{ .mfi
+(p6) ldfe atan_P16 = [atan_GR_AD_P],16
+(p7) fma.s1 atan_V11 = atan_Y, atan_Q9, atan_Q8
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+(p7) ldfe atan_Q1 = [atan_GR_AD_Q],16
+(p7) fma.s1 atan_G12 = atan_B, atan_B, f0
+ nop.i 999
+}
+{ .mfi
+(p6) ldfe atan_P17 = [atan_GR_AD_P],16
+(p0) fma.s1 atan_V9 = atan_V12, atan_V12, f0
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+(p7) ldfe atan_Q2 = [atan_GR_AD_Q],16
+(p6) fma.s1 atan_W11 = atan_Y, atan_P21, atan_P20
+ nop.i 999
+}
+{ .mfi
+(p6) ldfe atan_P18 = [atan_GR_AD_P],16
+(p7) fma.s1 atan_V7 = atan_Y, atan_Q5, atan_Q4
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p7) ldfe atan_Q3 = [atan_GR_AD_Q],16
+(p7) fma.s1 atan_Z1 = atan_Z, atan_Z, f0
+ nop.i 999
+}
+{ .mfi
+(p6) ldfe atan_P19 = [atan_GR_AD_P],16
+(p7) fma.s1 atan_Y3 = atan_Y , atan_V12, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p7) ldfe atan_R8 = [atan_GR_AD_R],16
+(p6) fma.s1 atan_V11 = atan_Y, atan_P9, atan_P8
+ nop.i 999
+}
+{ .mfi
+(p6) ldfe atan_P12 = [atan_GR_AD_P],16
+(p7) fma.s1 atan_V8 = atan_Y, atan_Q7, atan_Q6
+ nop.i 999 ;;
+}
+
+{ .mmi
+(p7) ldfe atan_R9 = [atan_GR_AD_R],16
+(p6) ldfe atan_P13 = [atan_GR_AD_P],16
+ nop.i 999
+;;
+}
+
+{ .mfi
+(p7) ldfe atan_R4 = [atan_GR_AD_R],16
+(p6) fma.s1 atan_V7 = atan_Y, atan_P5, atan_P4
+ nop.i 999
+}
+{ .mfi
+(p6) ldfe atan_P14 = [atan_GR_AD_P],16
+(p7) fma.s1 atan_W13 = atan_Y, atan_Q22, atan_Q21
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+(p7) ldfe atan_R5 = [atan_GR_AD_R],16
+(p6) fma.s1 atan_Y12 = atan_V9 , atan_V9 , f0
+ nop.i 999
+}
+{ .mfi
+(p6) ldfe atan_P15 = [atan_GR_AD_P],16
+(p7) fma.s1 atan_Y8 = atan_V9 , atan_V9 , f0
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+(p7) ldfe atan_R6 = [atan_GR_AD_R],16
+(p6) fma.s1 atan_V8 = atan_Y, atan_P7, atan_P6
+ nop.i 999
+}
+{ .mfi
+(p6) ldfe atan_P0 = [atan_GR_AD_P],16
+(p7) fma.s1 atan_W11 = atan_Y, atan_Q20, atan_Q19
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+(p7) ldfe atan_R7 = [atan_GR_AD_R],16
+(p7) fma.s1 atan_Z2 = atan_Z1 , atan_Z1, f0
+ nop.i 999
+}
+{ .mfi
+(p6) ldfe atan_P1 = [atan_GR_AD_P],16
+(p6) fma.s1 atan_V10 = atan_V12, atan_V13, atan_V11
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p7) ldfe atan_Q15 = [atan_GR_AD_Q],16
+(p6) fma.s1 atan_W7 = atan_Y, atan_P17, atan_P16
+ nop.i 999
+}
+{ .mfi
+(p6) ldfe atan_P2 = [atan_GR_AD_P],16
+(p7) fma.s1 atan_V3 = atan_Y, atan_Q1 , atan_Q0
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p7) ldfe atan_Q16 = [atan_GR_AD_Q],16
+(p7) fma.s1 atan_G9 = atan_G12, atan_G12, f0
+ nop.i 999
+}
+{ .mfi
+(p6) ldfe atan_P3 = [atan_GR_AD_P],16
+(p7) fma.s1 atan_V6 = atan_V12, atan_V8, atan_V7
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+(p7) ldfe atan_R1 = [atan_GR_AD_R],16
+(p6) fma.s1 atan_W8 = atan_Y, atan_P19, atan_P18
+ nop.i 999
+}
+{ .mfi
+(p6) ldfe atan_P22 = [atan_GR_AD_P],16
+(p7) fma.s1 atan_V4 = atan_Y, atan_Q3 , atan_Q2
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ getf.exp atan_GR_signexp_f8 = atan_NORM_f8
+(p7) fma.s1 atan_Y11 = atan_Y3 , atan_Y8 , f0
+ nop.i 999
+}
+{ .mfi
+(p7) ldfe atan_Q17 = [atan_GR_AD_Q],16
+(p6) fma.s1 atan_V6 = atan_V12, atan_V8, atan_V7
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+(p7) ldfe atan_Q18 = [atan_GR_AD_Q],16
+(p6) fma.s1 atan_W3 = atan_Y, atan_P13, atan_P12
+ nop.i 999
+}
+{ .mfi
+(p7) ldfe atan_R10 = [atan_GR_AD_R],16
+(p7) fma.s1 atan_G11 = atan_B, atan_R9 , atan_R8
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+(p7) ldfe atan_Q10 = [atan_GR_AD_Q],16
+(p7) fma.s1 atan_Z3 = atan_Z1 , atan_Z2 , f0
+(p0) and atan_GR_exp_f8 = atan_GR_signexp_f8,atan_GR_exp_mask
+}
+{ .mfi
+(p7) ldfe atan_R2 = [atan_GR_AD_R],16
+(p7) fma.s1 atan_Z4 = atan_Z2 , atan_Z2 , f0
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+(p7) ldfe atan_Q11 = [atan_GR_AD_Q],16
+(p6) fma.s1 atan_W4 = atan_Y, atan_P15, atan_P14
+ nop.i 999
+}
+{ .mfi
+(p7) ldfe atan_R3 = [atan_GR_AD_R],16
+(p7) fma.s1 atan_G7 = atan_B, atan_R5 , atan_R4
+(p0) cmp.le.unc p11,p0 = atan_GR_10172,atan_GR_exp_f8
+;;
+}
+
+
+{ .mmf
+(p9) ldfe atan_Q12 = [atan_GR_AD_Q],16
+(p0) ldfe atan_S_PI = [atan_GR_AD_R],16
+(p8) fma.s1 atan_W6 = atan_V12, atan_W8, atan_W7
+;;
+}
+
+
+
+{ .mfi
+(p9) ldfe atan_Q13 = [atan_GR_AD_Q],16
+(p8) fma.s1 atan_V3 = atan_Y, atan_P1 , atan_P0
+(p11) cmp.ne.and p6,p7 = r0,r0
+}
+{ .mfi
+ nop.m 999
+(p8) fma.s1 atan_V5 = atan_V9 , atan_V10, atan_V6
+ nop.i 999 ;;
+}
+
+
+.pred.rel "mutex",p6,p7,p11
+{ .mfi
+(p7) ldfe atan_Q14 = [atan_GR_AD_Q],16
+(p6) fma.s1 atan_Y12 = atan_V9 , atan_Y12, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p7) fma.s1 atan_G8 = atan_B, atan_R7 , atan_R6
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p6) fma.s1 atan_V4 = atan_Y, atan_P3 , atan_P2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p7) fma.s1 atan_W7 = atan_Y, atan_Q16, atan_Q15
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p6) fma.s1 atan_W10 = atan_V12, atan_P22, atan_W11
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p7) fma.s1 atan_G3 = atan_B, atan_R1 , f1
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p6) fma.s1 atan_W2 = atan_V12, atan_W4 , atan_W3
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p7) fma.s1 atan_V2 = atan_V12, atan_V4 , atan_V3
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p7) fma.s1 atan_W8 = atan_Y, atan_Q18, atan_Q17
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p7) fma.s1 atan_G10 = atan_G12, atan_R10, atan_G11
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p7) fma.s1 atan_V10 = atan_V12, atan_Q10, atan_V11
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p7) fma.s1 atan_G6 = atan_G12, atan_G8 , atan_G7
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p6) fma.s1 atan_V2 = atan_V12, atan_V4, atan_V3
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p7) fma.s1 atan_G4 = atan_B , atan_R3 , atan_R2
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p6) fma.s1 atan_W5 = atan_V9 , atan_W10, atan_W6
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p7) fma.s1 atan_W3 = atan_Y , atan_Q12, atan_Q11
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p7) fma.s1 atan_Z5 = atan_Z3 , atan_Z4 , f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p7) fma.s1 atan_W10 = atan_V12, atan_W13, atan_W11
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p7) fma.s1 atan_W4 = atan_Y , atan_Q14, atan_Q13
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p7) fma.s1 atan_W6 = atan_V12, atan_W8, atan_W7
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p7) fma.s1 atan_V5 = atan_V9 , atan_V10, atan_V6
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p7) fma.s1 atan_G5 = atan_G9 , atan_G10, atan_G6
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p6) fma.s1 atan_V1 = atan_V9 , atan_V5 , atan_V2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p7) fma.s1 atan_G2 = atan_G12, atan_G4 , atan_G3
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p6) fma.s1 atan_W1 = atan_V9 , atan_W5 , atan_W2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p7) fma.s1 atan_Z6 = atan_Z4 , atan_C , f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fmerge.s atan_S_PI = f8, atan_S_PI
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p7) fma.s1 atan_W5 = atan_V9 , atan_W10, atan_W6
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p7) fma.s1 atan_W2 = atan_V12, atan_W4 , atan_W3
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p7) fma.s1 atan_G1 = atan_G9 , atan_G5 , atan_G2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p7) fma.s1 atan_V1 = atan_V9 , atan_V5 , atan_V2
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p6) fma.s1 atan_P = atan_Y12, atan_W1 , atan_V1
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p7) fma.s1 atan_Z7 = atan_Z5 , atan_Z6 , f0
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p7) fma.s1 atan_W1 = atan_V9 , atan_W5 , atan_W2
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p11) fma.d.s0 f8 = atan_S_PI,f1,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p7) fma.s1 atan_Z = atan_G1 , atan_Z7 , f0
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p7) fma.s1 atan_Q = atan_Y11, atan_W1 , atan_V1
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p6) fma.d.s0 f8 = atan_P , atan_Xcub , f8
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p7) fnma.d.s0 f8 = atan_Z , atan_Q , atan_S_PI
+(p0) br.ret.sptk b0 ;;
+}
+
+.endp atan
+ASM_SIZE_DIRECTIVE(atan)
diff --git a/sysdeps/ia64/fpu/s_atanf.S b/sysdeps/ia64/fpu/s_atanf.S
new file mode 100644
index 0000000..8edd5d4
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_atanf.S
@@ -0,0 +1,543 @@
+.file "atanf.s"
+
+// THIS IS NOT OPTIMIZED AND NOT OFFICIAL
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+
+
+// History
+//==============================================================
+// ?/??/00 Initial revision
+// 8/17/00 Changed predicate register macro-usage to direct predicate
+// names due to an assembler bug.
+
+#include "libm_support.h"
+
+//
+// Assembly macros
+//==============================================================
+
+// integer registers used
+EXP_Addr1 = r33
+EXP_Addr2 = r34
+
+// floating point registers used
+atanf_coeff_R4 = f32
+atanf_coeff_R5 = f33
+atanf_coeff_R1 = f34
+atanf_coeff_R2 = f35
+
+atanf_coeff_R3 = f36
+atanf_coeff_P1 = f37
+atanf_coeff_Q6 = f38
+atanf_coeff_Q7 = f39
+atanf_coeff_Q8 = f40
+
+atanf_coeff_Q9 = f41
+atanf_coeff_Q4 = f42
+atanf_coeff_Q5 = f43
+atanf_coeff_Q2 = f44
+atanf_coeff_Q3 = f45
+
+atanf_coeff_P5 = f46
+atanf_coeff_P6 = f47
+atanf_coeff_Q0 = f48
+atanf_coeff_Q1 = f49
+atanf_coeff_P7 = f50
+
+atanf_coeff_P8 = f51
+atanf_coeff_P3 = f52
+atanf_coeff_P4 = f53
+atanf_coeff_P9 = f54
+atanf_coeff_P10 = f55
+
+atanf_coeff_P2 = f56
+atanf_piby2 = f57
+atanf_z = f58
+atanf_b = f59
+atanf_zsq = f60
+
+atanf_sgn_x = f61
+atanf_sgnx_piby2 = f62
+atanf_abs_x = f63
+atanf_t = f64
+atanf_xcub = f65
+
+atanf_tsq = f66
+atanf_t4 = f67
+atanf_x5 = f68
+atanf_x6 = f69
+atanf_x11 = f70
+
+atanf_poly_p1 = f71
+atanf_poly_p2 = f72
+atanf_poly_p3 = f73
+atanf_poly_p4 = f74
+atanf_poly_p5 = f75
+
+atanf_poly_q1 = f76
+atanf_poly_q2 = f77
+atanf_poly_q3 = f78
+atanf_poly_q4 = f79
+atanf_poly_q5 = f80
+
+atanf_poly_q = f81
+atanf_poly_r1 = f81
+atanf_poly_r2 = f82
+atanf_poly_r3 = f83
+atanf_bsq = f84
+atanf_z4 = f85
+
+atanf_z5 = f86
+atanf_z8 = f87
+atanf_z13 = f88
+atanf_poly_r2 = f89
+atanf_poly_r1 = f90
+
+atanf_z8_bsq = f91
+atanf_poly_r = f92
+atanf_z21_poly_r = f93
+atanf_answer = f8
+
+
+// predicate registers used
+//atanf_pred_LE1 = p6
+//atanf_pred_GT1 = p7
+
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
+.align 16
+
+atanf_coeff_1_table:
+ASM_TYPE_DIRECTIVE(atanf_coeff_1_table,@object)
+data8 0x40c4c241be751ff2 // r4
+data8 0x40e9f300c2f3070b // r5
+data8 0x409babffef772075 // r3
+data8 0xbfd5555512191621 // p1
+data8 0x3fc9997e7afbff4e // p2 = q8
+data8 0xbfd5555512191621 // p1 = q9
+data8 0x3f97105b4160f86b // p8 = q2
+data8 0xbfa6e10ba401393f // p7 = q3
+data8 0x3f522e5d33bc9baa // p10 = q0
+data8 0xbf7deaadaa336451 // p9 = q1
+data8 0xbfc2473c5145ee38 // p3
+data8 0x3fbc4f512b1865f5 // p4
+data8 0x3fc9997e7afbff4e // p2
+data8 0x3ff921fb54442d18 // pi/2
+ASM_SIZE_DIRECTIVE(atanf_coeff_1_table)
+
+
+
+atanf_coeff_2_table:
+ASM_TYPE_DIRECTIVE(atanf_coeff_2_table,@object)
+data8 0x4035000000004284 // r1
+data8 0x406cdffff336a59b // r2
+data8 0x3fbc4f512b1865f5 // p4 = q6
+data8 0xbfc2473c5145ee38 // p3 = q7
+data8 0x3fb142a73d7c54e3 // p6 = q4
+data8 0xbfb68eed6a8cfa32 // p5 = q5
+data8 0xbfb68eed6a8cfa32 // p5
+data8 0x3fb142a73d7c54e3 // p6
+data8 0xbfa6e10ba401393f // p7
+data8 0x3f97105b4160f86b // p8
+data8 0xbf7deaadaa336451 // p9
+data8 0x3f522e5d33bc9baa // p10
+ASM_SIZE_DIRECTIVE(atanf_coeff_2_table)
+
+
+
+.global atanf
+
+.text
+.proc atanf
+
+.align 32
+atanf:
+
+
+{ .mfi
+ alloc r32 = ar.pfs,1,2,0,0
+ frcpa.s1 atanf_z,p0 = f1,f8
+ addl EXP_Addr2 = @ltoff(atanf_coeff_2_table),gp
+}
+{ .mfi
+ addl EXP_Addr1 = @ltoff(atanf_coeff_1_table),gp
+ fma.s1 atanf_t = f8,f8,f0
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fmerge.s atanf_sgn_x = f8,f1
+ nop.i 999;;
+}
+
+{ .mfi
+ ld8 EXP_Addr1 = [EXP_Addr1]
+ fmerge.s atanf_abs_x = f1,f8
+ nop.i 999
+}
+{ .mfi
+ ld8 EXP_Addr2 = [EXP_Addr2]
+ nop.f 999
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fclass.m p8,p0 = f8,0x7 // @zero
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+ fcmp.eq.unc.s0 p9,p10 = f8,f1
+ nop.i 999;;
+}
+
+{ .mfi
+ ldfpd atanf_coeff_R4,atanf_coeff_R5 = [EXP_Addr1],16
+ fnma.s1 atanf_b = f8,atanf_z,f1
+ nop.i 999
+}
+{ .mfi
+ ldfpd atanf_coeff_R1,atanf_coeff_R2 = [EXP_Addr2],16
+ fma.s1 atanf_zsq = atanf_z,atanf_z,f0
+ nop.i 999;;
+}
+
+
+{ .mfi
+ ldfpd atanf_coeff_R3,atanf_coeff_P1 = [EXP_Addr1],16
+ fma.s1 atanf_xcub = f8,atanf_t,f0
+ nop.i 999
+}
+{ .mfi
+ ldfpd atanf_coeff_Q6,atanf_coeff_Q7 = [EXP_Addr2],16
+ fma.s1 atanf_tsq = atanf_t,atanf_t,f0
+ nop.i 999;;
+}
+
+
+{ .mfi
+ ldfpd atanf_coeff_Q8,atanf_coeff_Q9 = [EXP_Addr1],16
+// fcmp.le.s1 atanf_pred_LE1,atanf_pred_GT1 = atanf_abs_x,f1
+ fcmp.le.s1 p6,p7 = atanf_abs_x,f1
+ nop.i 999
+}
+{ .mfi
+ ldfpd atanf_coeff_Q4,atanf_coeff_Q5 = [EXP_Addr2],16
+ nop.f 999
+ nop.i 999;;
+}
+
+
+{ .mfi
+ ldfpd atanf_coeff_Q2,atanf_coeff_Q3 = [EXP_Addr1],16
+ fclass.m p8,p0 = f8,0xe7 // @inf|@qnan|@snan|@zero
+ nop.i 999
+}
+{ .mfi
+ ldfpd atanf_coeff_P5,atanf_coeff_P6 = [EXP_Addr2],16
+ nop.f 999
+ nop.i 999;;
+}
+
+
+{ .mfi
+ ldfpd atanf_coeff_Q0,atanf_coeff_Q1 = [EXP_Addr1],16
+ nop.f 999
+ nop.i 999
+}
+{ .mfi
+ ldfpd atanf_coeff_P7,atanf_coeff_P8 = [EXP_Addr2],16
+ nop.f 999
+ nop.i 999;;
+}
+
+
+{ .mfi
+ ldfpd atanf_coeff_P3,atanf_coeff_P4 = [EXP_Addr1],16
+ fma.s1 atanf_bsq = atanf_b,atanf_b,f0
+ nop.i 999
+}
+{ .mfi
+ ldfpd atanf_coeff_P9,atanf_coeff_P10 = [EXP_Addr2]
+ fma.s1 atanf_z4 = atanf_zsq,atanf_zsq,f0
+ nop.i 999;;
+}
+
+
+{ .mfi
+ ldfpd atanf_coeff_P2,atanf_piby2 = [EXP_Addr1]
+ fma.s1 atanf_x6 = atanf_t,atanf_tsq,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atanf_t4 = atanf_tsq,atanf_tsq,f0
+ nop.i 999;;
+}
+
+
+{ .mfb
+ nop.m 999
+ fma.s1 atanf_x5 = atanf_t,atanf_xcub,f0
+(p8) br.cond.spnt L(ATANF_X_INF_NAN_ZERO)
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 atanf_poly_r1 = atanf_b,atanf_coeff_R1,f1
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atanf_poly_r3 = atanf_b,atanf_coeff_R5,atanf_coeff_R4
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 atanf_poly_r2 = atanf_b,atanf_coeff_R3,atanf_coeff_R2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atanf_z8 = atanf_z4,atanf_z4,f0
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 atanf_poly_q2 = atanf_t,atanf_coeff_Q5,atanf_coeff_Q4
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atanf_poly_q3 = atanf_t,atanf_coeff_Q7,atanf_coeff_Q6
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 atanf_z5 = atanf_z,atanf_z4,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atanf_poly_q1 = atanf_t,atanf_coeff_Q9,atanf_coeff_Q8
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 atanf_poly_q4 = atanf_t,atanf_coeff_Q1,atanf_coeff_Q0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atanf_poly_q5 = atanf_t,atanf_coeff_Q3,atanf_coeff_Q2
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 atanf_poly_p4 = f8,atanf_coeff_P1,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atanf_poly_p5 = atanf_t,atanf_coeff_P4,atanf_coeff_P3
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 atanf_poly_r1 = atanf_z8,atanf_poly_r1,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atanf_z8_bsq = atanf_z8,atanf_bsq,f0
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 atanf_poly_q2 = atanf_tsq,atanf_poly_q3,atanf_poly_q2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atanf_poly_r2 = atanf_bsq,atanf_poly_r3,atanf_poly_r2
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 atanf_poly_p2 = atanf_t,atanf_coeff_P8,atanf_coeff_P7
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atanf_poly_q1 = atanf_poly_q1,f1,atanf_tsq
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 atanf_z13 = atanf_z5,atanf_z8,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atanf_poly_p1 = atanf_t,atanf_coeff_P10,atanf_coeff_P9
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 atanf_poly_p4 = atanf_t,atanf_poly_p4,f8
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atanf_poly_q4 = atanf_tsq,atanf_poly_q5,atanf_poly_q4
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 atanf_poly_p3 = atanf_t,atanf_coeff_P6,atanf_coeff_P5
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atanf_poly_p5 = atanf_t,atanf_poly_p5,atanf_coeff_P2
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 atanf_x11 = atanf_x5,atanf_x6,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atanf_poly_r = atanf_z8_bsq,atanf_poly_r2,atanf_poly_r1
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma atanf_sgnx_piby2 = atanf_sgn_x,atanf_piby2,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atanf_poly_q2 = atanf_t4,atanf_poly_q1,atanf_poly_q2
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 atanf_poly_p1 = atanf_tsq,atanf_poly_p1,atanf_poly_p2
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 atanf_poly_p4 = atanf_x5,atanf_poly_p5,atanf_poly_p4
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 atanf_z21_poly_r = atanf_z13,atanf_poly_r,f0
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 atanf_poly_q = atanf_t4,atanf_poly_q2,atanf_poly_q4
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 atanf_poly_p1 = atanf_tsq,atanf_poly_p1,atanf_poly_p3
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+//(atanf_pred_GT1) fnma.s atanf_answer = atanf_poly_q,atanf_z21_poly_r,atanf_sgnx_piby2
+(p7) fnma.s atanf_answer = atanf_poly_q,atanf_z21_poly_r,atanf_sgnx_piby2
+ nop.i 999;;
+}
+
+{ .mfb
+ nop.m 999
+//(atanf_pred_LE1) fma.s atanf_answer = atanf_x11,atanf_poly_p1,atanf_poly_p4
+(p6) fma.s atanf_answer = atanf_x11,atanf_poly_p1,atanf_poly_p4
+ br.ret.sptk b0
+}
+
+
+
+L(ATANF_X_INF_NAN_ZERO):
+
+ fclass.m p8,p9 = f8,0x23 // @inf
+;;
+(p8) fmerge.s f8 = f8, atanf_piby2
+;;
+ fnorm.s f8 = f8
+ br.ret.sptk b0
+
+.endp atanf
+ASM_SIZE_DIRECTIVE(atanf)
diff --git a/sysdeps/ia64/fpu/s_atanl.S b/sysdeps/ia64/fpu/s_atanl.S
new file mode 100644
index 0000000..0192ac6
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_atanl.S
@@ -0,0 +1,1994 @@
+.file "atanl.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+//
+// *********************************************************************
+//
+// History
+// 2/02/00 (hand-optimized)
+// 4/04/00 Unwind support added
+// 8/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+//
+// *********************************************************************
+//
+// Function: atanl(x) = inverse tangent(x), for double extended x values
+// Function: atan2l(y,x) = atan(y/x), for double extended x values
+//
+// *********************************************************************
+//
+// Resources Used:
+//
+// Floating-Point Registers: f8 (Input and Return Value)
+// f9-f15
+// f32-f79
+//
+// General Purpose Registers:
+// r32-r48
+// r49,r50,r51,r52 (Arguments to error support for 0,0 case)
+//
+// Predicate Registers: p6-p15
+//
+// *********************************************************************
+//
+// IEEE Special Conditions:
+//
+// Denormal fault raised on denormal inputs
+// Underflow exceptions may occur
+// Special error handling for the y=0 and x=0 case
+// Inexact raised when appropriate by algorithm
+//
+// atanl(SNaN) = QNaN
+// atanl(QNaN) = QNaN
+// atanl(+/-0) = +/- 0
+// atanl(+/-Inf) = +/-pi/2
+//
+// atan2l(Any NaN for x or y) = QNaN
+// atan2l(+/-0,x) = +/-0 for x > 0
+// atan2l(+/-0,x) = +/-pi for x < 0
+// atan2l(+/-0,+0) = +/-0
+// atan2l(+/-0,-0) = +/-pi
+// atan2l(y,+/-0) = pi/2 y > 0
+// atan2l(y,+/-0) = -pi/2 y < 0
+// atan2l(+/-y, Inf) = +/-0 for finite y > 0
+// atan2l(+/-Inf, x) = +/-pi/2 for finite x
+// atan2l(+/-y, -Inf) = +/-pi for finite y > 0
+// atan2l(+/-Inf, Inf) = +/-pi/4
+// atan2l(+/-Inf, -Inf) = +/-3pi/4
+//
+// *********************************************************************
+//
+// Mathematical Description
+// ---------------------------
+//
+// The function ATANL( Arg_Y, Arg_X ) returns the "argument"
+// or the "phase" of the complex number
+//
+// Arg_X + i Arg_Y
+//
+// or equivalently, the angle in radians from the positive
+// x-axis to the line joining the origin and the point
+// (Arg_X,Arg_Y)
+//
+//
+// (Arg_X, Arg_Y) x
+// \
+// \
+// \
+// \
+// \ angle between is ATANL(Arg_Y,Arg_X)
+
+
+
+
+// \
+// ------------------> X-axis
+
+// Origin
+//
+// Moreover, this angle is reported in the range [-pi,pi] thus
+//
+// -pi <= ATANL( Arg_Y, Arg_X ) <= pi.
+//
+// From the geometry, it is easy to define ATANL when one of
+// Arg_X or Arg_Y is +-0 or +-inf:
+//
+//
+// \ Y |
+// X \ | +0 | -0 | +inf | -inf | finite non-zero
+// \ | | | | |
+// ______________________________________________________
+// | | | |
+// +-0 | Invalid/ | pi/2 | -pi/2 | sign(Y)*pi/2
+// | qNaN | | |
+// --------------------------------------------------------
+// | | | | |
+// +inf | +0 | -0 | pi/4 | -pi/4 | sign(Y)*0
+// --------------------------------------------------------
+// | | | | |
+// -inf | +pi | -pi | 3pi/4 | -3pi/4 | sign(Y)*pi
+// --------------------------------------------------------
+// finite | X>0? | pi/2 | -pi/2 | normal case
+// non-zero| sign(Y)*0: | | |
+// | sign(Y)*pi | | |
+//
+//
+// One must take note that ATANL is NOT the arctangent of the
+// value Arg_Y/Arg_X; but rather ATANL and arctan are related
+// in a slightly more complicated way as follows:
+//
+// Let U := max(|Arg_X|, |Arg_Y|); V := min(|Arg_X|, |Arg_Y|);
+// sign_X be the sign bit of Arg_X, i.e., sign_X is 0 or 1;
+// s_X be the sign of Arg_X, i.e., s_X = (-1)^sign_X;
+//
+// sign_Y be the sign bit of Arg_Y, i.e., sign_Y is 0 or 1;
+// s_Y be the sign of Arg_Y, i.e., s_Y = (-1)^sign_Y;
+//
+// swap be 0 if |Arg_X| >= |Arg_Y| and 1 otherwise.
+//
+// Then, ATANL(Arg_Y, Arg_X) =
+//
+// / arctan(V/U) \ sign_X = 0 & swap = 0
+// | pi/2 - arctan(V/U) | sign_X = 0 & swap = 1
+// s_Y * | |
+// | pi - arctan(V/U) | sign_X = 1 & swap = 0
+// \ pi/2 + arctan(V/U) / sign_X = 1 & swap = 1
+//
+//
+// This relationship also suggest that the algorithm's major
+// task is to calculate arctan(V/U) for 0 < V <= U; and the
+// final Result is given by
+//
+// s_Y * { (P_hi + P_lo) + sigma * arctan(V/U) }
+//
+// where
+//
+// (P_hi,P_lo) represents M(sign_X,swap)*(pi/2) accurately
+//
+// M(sign_X,swap) = 0 for sign_X = 0 and swap = 0
+// 1 for swap = 1
+// 2 for sign_X = 1 and swap = 0
+//
+// and
+//
+// sigma = { (sign_X XOR swap) : -1.0 : 1.0 }
+//
+// = (-1) ^ ( sign_X XOR swap )
+//
+// Both (P_hi,P_lo) and sigma can be stored in a table and fetched
+// using (sign_X,swap) as an index. (P_hi, P_lo) can be stored as a
+// double-precision, and single-precision pair; and sigma can
+// obviously be just a single-precision number.
+//
+// In the algorithm we propose, arctan(V/U) is calculated to high accuracy
+// as A_hi + A_lo. Consequently, the Result ATANL( Arg_Y, Arg_X ) is
+// given by
+//
+// s_Y*P_hi + s_Y*sigma*A_hi + s_Y*(sigma*A_lo + P_lo)
+//
+// We now discuss the calculation of arctan(V/U) for 0 < V <= U.
+//
+// For (V/U) < 2^(-3), we use a simple polynomial of the form
+//
+// z + z^3*(P_1 + z^2*(P_2 + z^2*(P_3 + ... + P_8)))
+//
+// where z = V/U.
+//
+// For the sake of accuracy, the first term "z" must approximate V/U to
+// extra precision. For z^3 and higher power, a working precision
+// approximation to V/U suffices. Thus, we obtain:
+//
+// z_hi + z_lo = V/U to extra precision and
+// z = V/U to working precision
+//
+// The value arctan(V/U) is delivered as two pieces (A_hi, A_lo)
+//
+// (A_hi,A_lo) = (z_hi, z^3*(P_1 + ... + P_8) + z_lo).
+//
+//
+// For 2^(-3) <= (V/U) <= 1, we use a table-driven approach.
+// Consider
+//
+// (V/U) = 2^k * 1.b_1 b_2 .... b_63 b_64 b_65 ....
+//
+// Define
+//
+// z_hi = 2^k * 1.b_1 b_2 b_3 b_4 1
+//
+// then
+// / \
+// | (V/U) - z_hi |
+
+// arctan(V/U) = arctan(z_hi) + acrtan| -------------- |
+// | 1 + (V/U)*z_hi |
+// \ /
+//
+// / \
+// | V - z_hi*U |
+
+// = arctan(z_hi) + acrtan| -------------- |
+// | U + V*z_hi |
+// \ /
+//
+// = arctan(z_hi) + acrtan( V' / U' )
+//
+//
+// where
+//
+// V' = V - U*z_hi; U' = U + V*z_hi.
+//
+// Let
+//
+// w_hi + w_lo = V'/U' to extra precision and
+// w = V'/U' to working precision
+//
+// then we can approximate arctan(V'/U') by
+//
+// arctan(V'/U') = w_hi + w_lo
+// + w^3*(Q_1 + w^2*(Q_2 + w^2*(Q_3 + w^2*Q_4)))
+//
+// = w_hi + w_lo + poly
+//
+// Finally, arctan(z_hi) is calculated beforehand and stored in a table
+// as Tbl_hi, Tbl_lo. Thus,
+//
+// (A_hi, A_lo) = (Tbl_hi, w_hi+(poly+(w_lo+Tbl_lo)))
+//
+// This completes the mathematical description.
+//
+//
+// Algorithm
+// -------------
+//
+// Step 0. Check for unsupported format.
+//
+// If
+// ( expo(Arg_X) not zero AND msb(Arg_X) = 0 ) OR
+// ( expo(Arg_Y) not zero AND msb(Arg_Y) = 0 )
+//
+// then one of the arguments is unsupported. Generate an
+// invalid and return qNaN.
+//
+// Step 1. Initialize
+//
+// Normalize Arg_X and Arg_Y and set the following
+//
+// sign_X := sign_bit(Arg_X)
+// s_Y := (sign_bit(Arg_Y)==0? 1.0 : -1.0)
+// swap := (|Arg_X| >= |Arg_Y|? 0 : 1 )
+// U := max( |Arg_X|, |Arg_Y| )
+// V := min( |Arg_X|, |Arg_Y| )
+//
+// execute: frcap E, pred, V, U
+// If pred is 0, go to Step 5 for special cases handling.
+//
+// Step 2. Decide on branch.
+//
+// Q := E * V
+// If Q < 2^(-3) go to Step 4 for simple polynomial case.
+//
+// Step 3. Table-driven algorithm.
+//
+// Q is represented as
+//
+// 2^(-k) * 1.b_1 b_2 b_3 ... b_63; k = 0,-1,-2,-3
+//
+// and that if k = 0, b_1 = b_2 = b_3 = b_4 = 0.
+//
+// Define
+//
+// z_hi := 2^(-k) * 1.b_1 b_2 b_3 b_4 1
+//
+// (note that there are 49 possible values of z_hi).
+//
+// ...We now calculate V' and U'. While V' is representable
+// ...as a 64-bit number because of cancellation, U' is
+// ...not in general a 64-bit number. Obtaining U' accurately
+// ...requires two working precision numbers
+//
+// U_prime_hi := U + V * z_hi ...WP approx. to U'
+// U_prime_lo := ( U - U_prime_hi ) + V*z_hi ...observe order
+// V_prime := V - U * z_hi ...this is exact
+//
+// C_hi := frcpa (1.0, U_prime_hi) ...C_hi approx 1/U'_hi
+//
+// loop 3 times
+// C_hi := C_hi + C_hi*(1.0 - C_hi*U_prime_hi)
+//
+// ...at this point C_hi is (1/U_prime_hi) to roughly 64 bits
+//
+// w_hi := V_prime * C_hi ...w_hi is V_prime/U_prime to
+// ...roughly working precision
+//
+// ...note that we want w_hi + w_lo to approximate
+// ...V_prime/(U_prime_hi + U_prime_lo) to extra precision
+// ...but for now, w_hi is good enough for the polynomial
+// ...calculation.
+//
+// wsq := w_hi*w_hi
+// poly := w_hi*wsq*(Q_1 + wsq*(Q_2 + wsq*(Q_3 + wsq*Q_4)))
+//
+// Fetch
+// (Tbl_hi, Tbl_lo) = atan(z_hi) indexed by (k,b_1,b_2,b_3,b_4)
+// ...Tbl_hi is a double-precision number
+// ...Tbl_lo is a single-precision number
+//
+// (P_hi, P_lo) := M(sign_X,swap)*(Pi_by_2_hi, Pi_by_2_lo)
+// ...as discussed previous. Again; the implementation can
+// ...chose to fetch P_hi and P_lo from a table indexed by
+// ...(sign_X, swap).
+// ...P_hi is a double-precision number;
+// ...P_lo is a single-precision number.
+//
+// ...calculate w_lo so that w_hi + w_lo is V'/U' accurately
+// w_lo := ((V_prime - w_hi*U_prime_hi) -
+// w_hi*U_prime_lo) * C_hi ...observe order
+//
+//
+// ...Ready to deliver arctan(V'/U') as A_hi, A_lo
+// A_hi := Tbl_hi
+// A_lo := w_hi + (poly + (Tbl_lo + w_lo)) ...observe order
+//
+// ...Deliver final Result
+// ...s_Y*P_hi + s_Y*sigma*A_hi + s_Y*(sigma*A_lo + P_lo)
+//
+// sigma := ( (sign_X XOR swap) ? -1.0 : 1.0 )
+// ...sigma can be obtained by a table lookup using
+// ...(sign_X,swap) as index and stored as single precision
+// ...sigma should be calculated earlier
+//
+// P_hi := s_Y*P_hi
+// A_hi := s_Y*A_hi
+//
+// Res_hi := P_hi + sigma*A_hi ...this is exact because
+// ...both P_hi and Tbl_hi
+// ...are double-precision
+// ...and |Tbl_hi| > 2^(-4)
+// ...P_hi is either 0 or
+// ...between (1,4)
+//
+// Res_lo := sigma*A_lo + P_lo
+//
+// Return Res_hi + s_Y*Res_lo in user-defined rounding control
+//
+// Step 4. Simple polynomial case.
+//
+// ...E and Q are inherited from Step 2.
+//
+// A_hi := Q ...Q is inherited from Step 2 Q approx V/U
+//
+// loop 3 times
+// E := E + E2(1.0 - E*U1
+// ...at this point E approximates 1/U to roughly working precision
+//
+// z := V * E ...z approximates V/U to roughly working precision
+// zsq := z * z
+// z8 := zsq * zsq; z8 := z8 * z8
+//
+// poly1 := P_4 + zsq*(P_5 + zsq*(P_6 + zsq*(P_7 + zsq*P_8)))
+// poly2 := zsq*(P_1 + zsq*(P_2 + zsq*P_3))
+//
+// poly := poly1 + z8*poly2
+//
+// z_lo := (V - A_hi*U)*E
+//
+// A_lo := z*poly + z_lo
+// ...A_hi, A_lo approximate arctan(V/U) accurately
+//
+// (P_hi, P_lo) := M(sign_X,swap)*(Pi_by_2_hi, Pi_by_2_lo)
+// ...one can store the M(sign_X,swap) as single precision
+// ...values
+//
+// ...Deliver final Result
+// ...s_Y*P_hi + s_Y*sigma*A_hi + s_Y*(sigma*A_lo + P_lo)
+//
+// sigma := ( (sign_X XOR swap) ? -1.0 : 1.0 )
+// ...sigma can be obtained by a table lookup using
+// ...(sign_X,swap) as index and stored as single precision
+// ...sigma should be calculated earlier
+//
+// P_hi := s_Y*P_hi
+// A_hi := s_Y*A_hi
+//
+// Res_hi := P_hi + sigma*A_hi ...need to compute
+// ...P_hi + sigma*A_hi
+// ...exactly
+//
+// tmp := (P_hi - Res_hi) + sigma*A_hi
+//
+// Res_lo := s_Y*(sigma*A_lo + P_lo) + tmp
+//
+// Return Res_hi + Res_lo in user-defined rounding control
+//
+// Step 5. Special Cases
+//
+// If pred is 0 where pred is obtained in
+// frcap E, pred, V, U
+//
+// we are in one of those special cases of 0,+-inf or NaN
+//
+// If one of U and V is NaN, return U+V (which will generate
+// invalid in case one is a signaling NaN). Otherwise,
+// return the Result as described in the table
+//
+//
+//
+// \ Y |
+// X \ | +0 | -0 | +inf | -inf | finite non-zero
+// \ | | | | |
+// ______________________________________________________
+// | | | |
+// +-0 | Invalid/ | pi/2 | -pi/2 | sign(Y)*pi/2
+// | qNaN | | |
+// --------------------------------------------------------
+// | | | | |
+// +inf | +0 | -0 | pi/4 | -pi/4 | sign(Y)*0
+// --------------------------------------------------------
+// | | | | |
+// -inf | +pi | -pi | 3pi/4 | -3pi/4 | sign(Y)*pi
+// --------------------------------------------------------
+// finite | X>0? | pi/2 | -pi/2 |
+// non-zero| sign(Y)*0: | | | N/A
+// | sign(Y)*pi | | |
+//
+//
+
+#include "libm_support.h"
+
+ArgY_orig = f8
+Result = f8
+FR_RESULT = f8
+ArgX_orig = f9
+ArgX = f10
+FR_X = f10
+ArgY = f11
+FR_Y = f11
+s_Y = f12
+U = f13
+V = f14
+E = f15
+Q = f32
+z_hi = f33
+U_prime_hi = f34
+U_prime_lo = f35
+V_prime = f36
+C_hi = f37
+w_hi = f38
+w_lo = f39
+wsq = f40
+poly = f41
+Tbl_hi = f42
+Tbl_lo = f43
+P_hi = f44
+P_lo = f45
+A_hi = f46
+A_lo = f47
+sigma = f48
+Res_hi = f49
+Res_lo = f50
+Z = f52
+zsq = f53
+z8 = f54
+poly1 = f55
+poly2 = f56
+z_lo = f57
+tmp = f58
+P_1 = f59
+Q_1 = f60
+P_2 = f61
+Q_2 = f62
+P_3 = f63
+Q_3 = f64
+P_4 = f65
+Q_4 = f66
+P_5 = f67
+P_6 = f68
+P_7 = f69
+P_8 = f70
+TWO_TO_NEG3 = f71
+U_hold = f72
+C_hi_hold = f73
+E_hold = f74
+M = f75
+ArgX_abs = f76
+ArgY_abs = f77
+Result_lo = f78
+A_temp = f79
+GR_SAVE_PFS = r33
+GR_SAVE_B0 = r34
+GR_SAVE_GP = r35
+sign_X = r36
+sign_Y = r37
+swap = r38
+table_ptr1 = r39
+table_ptr2 = r40
+k = r41
+lookup = r42
+exp_ArgX = r43
+exp_ArgY = r44
+exponent_Q = r45
+significand_Q = r46
+special = r47
+special1 = r48
+GR_Parameter_X = r49
+GR_Parameter_Y = r50
+GR_Parameter_RESULT = r51
+GR_Parameter_TAG = r52
+int_temp = r52
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+.align 64
+
+Constants_atan:
+ASM_TYPE_DIRECTIVE(Constants_atan,@object)
+data4 0x54442D18, 0x3FF921FB, 0x248D3132, 0x3E000000
+// double pi/2, single lo_pi/2, two**(-3)
+data4 0xAAAAAAA3, 0xAAAAAAAA, 0x0000BFFD, 0x00000000 // P_1
+data4 0xCCCC54B2, 0xCCCCCCCC, 0x00003FFC, 0x00000000 // P_2
+data4 0x47E4D0C2, 0x92492492, 0x0000BFFC, 0x00000000 // P_3
+data4 0x58870889, 0xE38E38E0, 0x00003FFB, 0x00000000 // P_4
+data4 0x290149F8, 0xBA2E895B, 0x0000BFFB, 0x00000000 // P_5
+data4 0x250F733D, 0x9D88E6D4, 0x00003FFB, 0x00000000 // P_6
+data4 0xFB8745A0, 0x884E51FF, 0x0000BFFB, 0x00000000 // P_7
+data4 0x394396BD, 0xE1C7412B, 0x00003FFA, 0x00000000 // P_8
+data4 0xAAAAA52F, 0xAAAAAAAA, 0x0000BFFD, 0x00000000 // Q_1
+data4 0xC75B60D3, 0xCCCCCCCC, 0x00003FFC, 0x00000000 // Q_2
+data4 0x011F1940, 0x924923AD, 0x0000BFFC, 0x00000000 // Q_3
+data4 0x2A5F89BD, 0xE36F716D, 0x00003FFB, 0x00000000 // Q_4
+//
+// Entries Tbl_hi (double precision)
+// B = 1+Index/16+1/32 Index = 0
+// Entries Tbl_lo (single precision)
+// B = 1+Index/16+1/32 Index = 0
+//
+data4 0xA935BD8E, 0x3FE9A000, 0x23ACA08F, 0x00000000
+//
+// Entries Tbl_hi (double precision) Index = 0,1,...,15
+// B = 2^(-1)*(1+Index/16+1/32)
+// Entries Tbl_lo (single precision)
+// Index = 0,1,...,15 B = 2^(-1)*(1+Index/16+1/32)
+//
+data4 0x7F175A34, 0x3FDE77EB, 0x238729EE, 0x00000000
+data4 0x73C1A40B, 0x3FE0039C, 0x249334DB, 0x00000000
+data4 0x5B5B43DA, 0x3FE0C614, 0x22CBA7D1, 0x00000000
+data4 0x88BE7C13, 0x3FE1835A, 0x246310E7, 0x00000000
+data4 0xE2CC9E6A, 0x3FE23B71, 0x236210E5, 0x00000000
+data4 0x8406CBCA, 0x3FE2EE62, 0x2462EAF5, 0x00000000
+data4 0x1CD41719, 0x3FE39C39, 0x24B73EF3, 0x00000000
+data4 0x5B795B55, 0x3FE44506, 0x24C11260, 0x00000000
+data4 0x5BB6EC04, 0x3FE4E8DE, 0x242519EE, 0x00000000
+data4 0x1F732FBA, 0x3FE587D8, 0x24D4346C, 0x00000000
+data4 0x115D7B8D, 0x3FE6220D, 0x24ED487B, 0x00000000
+data4 0x920B3D98, 0x3FE6B798, 0x2495FF1E, 0x00000000
+data4 0x8FBA8E0F, 0x3FE74897, 0x223D9531, 0x00000000
+data4 0x289FA093, 0x3FE7D528, 0x242B0411, 0x00000000
+data4 0x576CC2C5, 0x3FE85D69, 0x2335B374, 0x00000000
+data4 0xA99CC05D, 0x3FE8E17A, 0x24C27CFB, 0x00000000
+//
+// Entries Tbl_hi (double precision) Index = 0,1,...,15
+// B = 2^(-2)*(1+Index/16+1/32)
+// Entries Tbl_lo (single precision)
+// Index = 0,1,...,15 B = 2^(-2)*(1+Index/16+1/32)
+//
+data4 0x510665B5, 0x3FD025FA, 0x24263482, 0x00000000
+data4 0x362431C9, 0x3FD1151A, 0x242C8DC9, 0x00000000
+data4 0x67E47C95, 0x3FD20255, 0x245CF9BA, 0x00000000
+data4 0x7A823CFE, 0x3FD2ED98, 0x235C892C, 0x00000000
+data4 0x29271134, 0x3FD3D6D1, 0x2389BE52, 0x00000000
+data4 0x586890E6, 0x3FD4BDEE, 0x24436471, 0x00000000
+data4 0x175E0F4E, 0x3FD5A2E0, 0x2389DBD4, 0x00000000
+data4 0x9F5FA6FD, 0x3FD68597, 0x2476D43F, 0x00000000
+data4 0x52817501, 0x3FD76607, 0x24711774, 0x00000000
+data4 0xB8DF95D7, 0x3FD84422, 0x23EBB501, 0x00000000
+data4 0x7CD0C662, 0x3FD91FDE, 0x23883A0C, 0x00000000
+data4 0x66168001, 0x3FD9F930, 0x240DF63F, 0x00000000
+data4 0x5422058B, 0x3FDAD00F, 0x23FE261A, 0x00000000
+data4 0x378624A5, 0x3FDBA473, 0x23A8CD0E, 0x00000000
+data4 0x0AAD71F8, 0x3FDC7655, 0x2422D1D0, 0x00000000
+data4 0xC9EC862B, 0x3FDD45AE, 0x2344A109, 0x00000000
+//
+// Entries Tbl_hi (double precision) Index = 0,1,...,15
+// B = 2^(-3)*(1+Index/16+1/32)
+// Entries Tbl_lo (single precision)
+// Index = 0,1,...,15 B = 2^(-3)*(1+Index/16+1/32)
+//
+data4 0x84212B3D, 0x3FC068D5, 0x239874B6, 0x00000000
+data4 0x41060850, 0x3FC16465, 0x2335E774, 0x00000000
+data4 0x171A535C, 0x3FC25F6E, 0x233E36BE, 0x00000000
+data4 0xEDEB99A3, 0x3FC359E8, 0x239680A3, 0x00000000
+data4 0xC6092A9E, 0x3FC453CE, 0x230FB29E, 0x00000000
+data4 0xBA11570A, 0x3FC54D18, 0x230C1418, 0x00000000
+data4 0xFFB3AA73, 0x3FC645BF, 0x23F0564A, 0x00000000
+data4 0xE8A7D201, 0x3FC73DBD, 0x23D4A5E1, 0x00000000
+data4 0xE398EBC7, 0x3FC8350B, 0x23D4ADDA, 0x00000000
+data4 0x7D050271, 0x3FC92BA3, 0x23BCB085, 0x00000000
+data4 0x601081A5, 0x3FCA217E, 0x23BC841D, 0x00000000
+data4 0x574D780B, 0x3FCB1696, 0x23CF4A8E, 0x00000000
+data4 0x4D768466, 0x3FCC0AE5, 0x23BECC90, 0x00000000
+data4 0x4E1D5395, 0x3FCCFE65, 0x2323DCD2, 0x00000000
+data4 0x864C9D9D, 0x3FCDF110, 0x23F53F3A, 0x00000000
+data4 0x451D980C, 0x3FCEE2E1, 0x23CCB11F, 0x00000000
+
+data4 0x54442D18, 0x400921FB, 0x33145C07, 0x3CA1A626 // PI two doubles
+data4 0x54442D18, 0x3FF921FB, 0x33145C07, 0x3C91A626 // PI_by_2 two dbles
+data4 0x54442D18, 0x3FE921FB, 0x33145C07, 0x3C81A626 // PI_by_4 two dbles
+data4 0x7F3321D2, 0x4002D97C, 0x4C9E8A0A, 0x3C9A7939 // 3PI_by_4 two dbles
+ASM_SIZE_DIRECTIVE(Constants_atan)
+
+
+.text
+.proc atanl#
+.global atanl#
+.align 64
+
+atanl:
+{ .mfb
+ nop.m 999
+(p0) mov ArgX_orig = f1
+(p0) br.cond.sptk atan2l ;;
+}
+.endp atanl
+ASM_SIZE_DIRECTIVE(atanl)
+
+.text
+.proc atan2l#
+.global atan2l#
+#ifdef _LIBC
+.proc __atan2l#
+.global __atan2l#
+.proc __ieee754_atan2l#
+.global __ieee754_atan2l#
+#endif
+.align 64
+
+
+atan2l:
+#ifdef _LIBC
+__atan2l:
+__ieee754_atan2l:
+#endif
+{ .mfi
+alloc r32 = ar.pfs, 0, 17 , 4, 0
+(p0) mov ArgY = ArgY_orig
+}
+{ .mfi
+ nop.m 999
+(p0) mov ArgX = ArgX_orig
+ nop.i 999
+};;
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p7,p0 = ArgY_orig, 0x103
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+//
+// Save original input args and load table ptr.
+//
+(p0) fclass.m.unc p6,p0 = ArgX_orig, 0x103
+ nop.i 999
+};;
+{ .mfi
+(p0) addl table_ptr1 = @ltoff(Constants_atan#), gp
+(p0) fclass.m.unc p0,p9 = ArgY_orig, 0x1FF
+ nop.i 999 ;;
+}
+{ .mfi
+ ld8 table_ptr1 = [table_ptr1]
+(p0) fclass.m.unc p0,p8 = ArgX_orig, 0x1FF
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p13,p0 = ArgY_orig, 0x0C3
+ nop.i 999 ;;
+}
+{ .mfi
+(p0) fclass.m.unc p12,p0 = ArgX_orig, 0x0C3
+ nop.i 999
+}
+
+
+//
+// Check for NatVals.
+// Check for everything - if false, then must be pseudo-zero
+// or pseudo-nan (IA unsupporteds).
+//
+{ .mib
+ nop.m 999
+ nop.i 999
+(p6) br.cond.spnt L(ATANL_NATVAL) ;;
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+(p7) br.cond.spnt L(ATANL_NATVAL) ;;
+}
+{ .mib
+(p0) ldfd P_hi = [table_ptr1],8
+ nop.i 999
+(p8) br.cond.spnt L(ATANL_UNSUPPORTED) ;;
+}
+{ .mbb
+(p0) add table_ptr2 = 96, table_ptr1
+(p9) br.cond.spnt L(ATANL_UNSUPPORTED)
+//
+// Load double precision high-order part of pi
+//
+(p12) br.cond.spnt L(ATANL_NAN) ;;
+}
+{ .mfb
+ nop.m 999
+(p0) fnorm.s1 ArgX = ArgX
+(p13) br.cond.spnt L(ATANL_NAN) ;;
+}
+//
+// Normalize the input argument.
+// Branch out if NaN inputs
+//
+{ .mmf
+(p0) ldfs P_lo = [table_ptr1], 4
+ nop.m 999
+(p0) fnorm.s1 ArgY = ArgY ;;
+}
+{ .mmf
+ nop.m 999
+(p0) ldfs TWO_TO_NEG3 = [table_ptr1], 180
+//
+// U = max(ArgX_abs,ArgY_abs)
+// V = min(ArgX_abs,ArgY_abs)
+// if PR1, swap = 0
+// if PR2, swap = 1
+//
+(p0) mov M = f1 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Get exp and sign of ArgX
+// Get exp and sign of ArgY
+// Load 2**(-3) and increment ptr to Q_4.
+//
+(p0) fmerge.s ArgX_abs = f1, ArgX
+ nop.i 999 ;;
+}
+//
+// load single precision low-order part of pi = P_lo
+//
+{ .mfi
+(p0) getf.exp sign_X = ArgX
+(p0) fmerge.s ArgY_abs = f1, ArgY
+ nop.i 999 ;;
+}
+{ .mii
+(p0) getf.exp sign_Y = ArgY
+ nop.i 999 ;;
+(p0) shr sign_X = sign_X, 17 ;;
+}
+{ .mii
+ nop.m 999
+(p0) shr sign_Y = sign_Y, 17 ;;
+(p0) cmp.eq.unc p8, p9 = 0x00000, sign_Y ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Is ArgX_abs >= ArgY_abs
+// Is sign_Y == 0?
+//
+(p0) fmax.s1 U = ArgX_abs, ArgY_abs
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+// ArgX_abs = |ArgX|
+// ArgY_abs = |ArgY|
+// sign_X is sign bit of ArgX
+// sign_Y is sign bit of ArgY
+//
+(p0) fcmp.ge.s1 p6, p7 = ArgX_abs, ArgY_abs
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fmin.s1 V = ArgX_abs, ArgY_abs
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p8) fadd.s1 s_Y = f0, f1
+(p6) cmp.eq.unc p10, p11 = 0x00000, sign_X
+}
+{ .mii
+(p6) add swap = r0, r0
+ nop.i 999 ;;
+(p7) add swap = 1, r0
+}
+{ .mfi
+ nop.m 999
+//
+// Let M = 1.0
+// if p8, s_Y = 1.0
+// if p9, s_Y = -1.0
+//
+(p10) fsub.s1 M = M, f1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p9) fsub.s1 s_Y = f0, f1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) frcpa.s1 E, p6 = V, U
+ nop.i 999 ;;
+}
+{ .mbb
+ nop.m 999
+//
+// E = frcpa(V,U)
+//
+(p6) br.cond.sptk L(ATANL_STEP2)
+(p0) br.cond.spnt L(ATANL_SPECIAL_HANDLING) ;;
+}
+L(ATANL_STEP2):
+{ .mfi
+ nop.m 999
+(p0) fmpy.s1 Q = E, V
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fcmp.eq.s0 p0, p9 = f1, ArgY_orig
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Is Q < 2**(-3)?
+//
+(p0) fcmp.eq.s0 p0, p8 = f1, ArgX_orig
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p11) fadd.s1 M = M, f1
+ nop.i 999 ;;
+}
+{ .mlx
+ nop.m 999
+// *************************************************
+// ********************* STEP2 *********************
+// *************************************************
+(p0) movl special = 0x8400000000000000
+}
+{ .mlx
+ nop.m 999
+//
+// lookup = b_1 b_2 b_3 B_4
+//
+(p0) movl special1 = 0x0000000000000100 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Do fnorms to raise any denormal operand
+// exceptions.
+//
+(p0) fmpy.s1 P_hi = M, P_hi
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fmpy.s1 P_lo = M, P_lo
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Q = E * V
+//
+(p0) fcmp.lt.unc.s1 p6, p7 = Q, TWO_TO_NEG3
+ nop.i 999 ;;
+}
+{ .mmb
+(p0) getf.sig significand_Q = Q
+(p0) getf.exp exponent_Q = Q
+ nop.b 999 ;;
+}
+{ .mmi
+ nop.m 999 ;;
+(p0) andcm k = 0x0003, exponent_Q
+(p0) extr.u lookup = significand_Q, 59, 4 ;;
+}
+{ .mib
+ nop.m 999
+(p0) dep special = lookup, special, 59, 4
+//
+// Generate 1.b_1 b_2 b_3 b_4 1 0 0 0 ... 0
+//
+(p6) br.cond.spnt L(ATANL_POLY) ;;
+}
+{ .mfi
+(p0) cmp.eq.unc p8, p9 = 0x0000, k
+(p0) fmpy.s1 P_hi = s_Y, P_hi
+//
+// We waited a few extra cycles so P_lo and P_hi could be calculated.
+// Load the constant 256 for loading up table entries.
+//
+// *************************************************
+// ******************** STEP3 **********************
+// *************************************************
+(p0) add table_ptr2 = 16, table_ptr1
+}
+//
+// Let z_hi have exponent and sign of original Q
+// Load the Tbl_hi(0) else, increment pointer.
+//
+{ .mii
+(p0) ldfe Q_4 = [table_ptr1], -16
+(p0) xor swap = sign_X, swap ;;
+(p9) sub k = k, r0, 1
+}
+{ .mmi
+(p0) setf.sig z_hi = special
+(p0) ldfe Q_3 = [table_ptr1], -16
+(p9) add table_ptr2 = 16, table_ptr2 ;;
+}
+//
+// U_hold = U - U_prime_hi
+// k = k * 256 - Result can be 0, 256, or 512.
+//
+{ .mmb
+(p0) ldfe Q_2 = [table_ptr1], -16
+(p8) ldfd Tbl_hi = [table_ptr2], 8
+ nop.b 999 ;;
+}
+//
+// U_prime_lo = U_hold + V * z_hi
+// lookup -> lookup * 16 + k
+//
+{ .mmi
+(p0) ldfe Q_1 = [table_ptr1], -16 ;;
+(p8) ldfs Tbl_lo = [table_ptr2], 8
+//
+// U_prime_hi = U + V * z_hi
+// Load the Tbl_lo(0)
+//
+(p9) pmpy2.r k = k, special1 ;;
+}
+{ .mii
+ nop.m 999
+ nop.i 999
+ nop.i 999 ;;
+}
+{ .mii
+ nop.m 999
+ nop.i 999
+ nop.i 999 ;;
+}
+{ .mii
+ nop.m 999
+ nop.i 999
+ nop.i 999 ;;
+}
+{ .mii
+ nop.m 999
+ nop.i 999 ;;
+(p9) shladd lookup = lookup, 0x0004, k ;;
+}
+{ .mmi
+(p9) add table_ptr2 = table_ptr2, lookup ;;
+//
+// V_prime = V - U * z_hi
+//
+(p9) ldfd Tbl_hi = [table_ptr2], 8
+ nop.i 999 ;;
+}
+{ .mmf
+ nop.m 999
+//
+// C_hi = frcpa(1,U_prime_hi)
+//
+(p9) ldfs Tbl_lo = [table_ptr2], 8
+//
+// z_hi = s exp 1.b_1 b_2 b_3 b_4 1 0 0 0 ... 0
+// Point to beginning of Tbl_hi entries - k = 0.
+//
+(p0) fmerge.se z_hi = Q, z_hi ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fma.s1 U_prime_hi = V, z_hi, U
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fnma.s1 V_prime = U, z_hi, V
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) mov A_hi = Tbl_hi
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fsub.s1 U_hold = U, U_prime_hi
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) frcpa.s1 C_hi, p6 = f1, U_prime_hi
+ nop.i 999 ;;
+}
+{ .mfi
+(p0) cmp.eq.unc p7, p6 = 0x00000, swap
+(p0) fmpy.s1 A_hi = s_Y, A_hi
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// poly = wsq * poly
+//
+(p7) fadd.s1 sigma = f0, f1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fma.s1 U_prime_lo = z_hi, V, U_hold
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p6) fsub.s1 sigma = f0, f1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fnma.s1 C_hi_hold = C_hi, U_prime_hi, f1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// A_lo = A_lo + w_hi
+// A_hi = s_Y * A_hi
+//
+(p0) fma.s1 Res_hi = sigma, A_hi, P_hi
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// C_hi_hold = 1 - C_hi * U_prime_hi (1)
+//
+(p0) fma.s1 C_hi = C_hi_hold, C_hi, C_hi
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// C_hi = C_hi + C_hi * C_hi_hold (1)
+//
+(p0) fnma.s1 C_hi_hold = C_hi, U_prime_hi, f1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// C_hi_hold = 1 - C_hi * U_prime_hi (2)
+//
+(p0) fma.s1 C_hi = C_hi_hold, C_hi, C_hi
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// C_hi = C_hi + C_hi * C_hi_hold (2)
+//
+(p0) fnma.s1 C_hi_hold = C_hi, U_prime_hi, f1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// C_hi_hold = 1 - C_hi * U_prime_hi (3)
+//
+(p0) fma.s1 C_hi = C_hi_hold, C_hi, C_hi
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// C_hi = C_hi + C_hi * C_hi_hold (3)
+//
+(p0) fmpy.s1 w_hi = V_prime, C_hi
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// w_hi = V_prime * C_hi
+//
+(p0) fmpy.s1 wsq = w_hi, w_hi
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fnma.s1 w_lo = w_hi, U_prime_hi, V_prime
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// wsq = w_hi * w_hi
+// w_lo = = V_prime - w_hi * U_prime_hi
+//
+(p0) fma.s1 poly = wsq, Q_4, Q_3
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fnma.s1 w_lo = w_hi, U_prime_lo, w_lo
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// poly = Q_3 + wsq * Q_4
+// w_lo = = w_lo - w_hi * U_prime_lo
+//
+(p0) fma.s1 poly = wsq, poly, Q_2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fmpy.s1 w_lo = C_hi, w_lo
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// poly = Q_2 + wsq * poly
+// w_lo = = w_lo * C_hi
+//
+(p0) fma.s1 poly = wsq, poly, Q_1
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fadd.s1 A_lo = Tbl_lo, w_lo
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Result = Res_hi + Res_lo * s_Y (User Supplied Rounding Mode)
+//
+(p0) fmpy.s0 Q_1 = Q_1, Q_1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// poly = Q_1 + wsq * poly
+// A_lo = Tbl_lo + w_lo
+// swap = xor(swap,sign_X)
+//
+(p0) fmpy.s1 poly = wsq, poly
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Is (swap) != 0 ?
+// poly = wsq * poly
+// A_hi = Tbl_hi
+//
+(p0) fmpy.s1 poly = w_hi, poly
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// if (PR_1) sigma = -1.0
+// if (PR_2) sigma = 1.0
+//
+(p0) fadd.s1 A_lo = A_lo, poly
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// P_hi = s_Y * P_hi
+// A_lo = A_lo + poly
+//
+(p0) fadd.s1 A_lo = A_lo, w_hi
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fma.s1 Res_lo = sigma, A_lo, P_lo
+ nop.i 999 ;;
+}
+{ .mfb
+ nop.m 999
+//
+// Res_hi = P_hi + sigma * A_hi
+// Res_lo = P_lo + sigma * A_lo
+//
+(p0) fma.s0 Result = Res_lo, s_Y, Res_hi
+//
+// Raise inexact.
+//
+br.ret.sptk b0 ;;
+}
+//
+// poly1 = P_5 + zsq * poly1
+// poly2 = zsq * poly2
+//
+L(ATANL_POLY):
+{ .mmf
+(p0) xor swap = sign_X, swap
+ nop.m 999
+(p0) fnma.s1 E_hold = E, U, f1 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) mov A_temp = Q
+//
+// poly1 = P_4 + zsq * poly1
+// swap = xor(swap,sign_X)
+//
+// sign_X gr_002
+// swap gr_004
+// poly1 = poly1 <== Done with poly1
+// poly1 = P_4 + zsq * poly1
+// swap = xor(swap,sign_X)
+//
+(p0) cmp.eq.unc p7, p6 = 0x00000, swap
+}
+{ .mfi
+ nop.m 999
+(p0) fmpy.s1 P_hi = s_Y, P_hi
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p6) fsub.s1 sigma = f0, f1
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p7) fadd.s1 sigma = f0, f1
+ nop.i 999 ;;
+}
+
+// ***********************************************
+// ******************** STEP4 ********************
+// ***********************************************
+
+{ .mmi
+ nop.m 999
+(p0) addl table_ptr1 = @ltoff(Constants_atan#), gp
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ld8 table_ptr1 = [table_ptr1]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 E = E, E_hold, E
+//
+// Following:
+// Iterate 3 times E = E + E*(1.0 - E*U)
+// Also load P_8, P_7, P_6, P_5, P_4
+// E_hold = 1.0 - E * U (1)
+// A_temp = Q
+//
+(p0) add table_ptr1 = 128, table_ptr1 ;;
+}
+{ .mmf
+ nop.m 999
+//
+// E = E + E_hold*E (1)
+// Point to P_8.
+//
+(p0) ldfe P_8 = [table_ptr1], -16
+//
+// poly = z8*poly1 + poly2 (Typo in writeup)
+// Is (swap) != 0 ?
+//
+(p0) fnma.s1 z_lo = A_temp, U, V ;;
+}
+{ .mmb
+ nop.m 999
+//
+// E_hold = 1.0 - E * U (2)
+//
+(p0) ldfe P_7 = [table_ptr1], -16
+ nop.b 999 ;;
+}
+{ .mmb
+ nop.m 999
+//
+// E = E + E_hold*E (2)
+//
+(p0) ldfe P_6 = [table_ptr1], -16
+ nop.b 999 ;;
+}
+{ .mmb
+ nop.m 999
+//
+// E_hold = 1.0 - E * U (3)
+//
+(p0) ldfe P_5 = [table_ptr1], -16
+ nop.b 999 ;;
+}
+{ .mmf
+ nop.m 999
+//
+// E = E + E_hold*E (3)
+//
+//
+// At this point E approximates 1/U to roughly working precision
+// z = V*E approximates V/U
+//
+(p0) ldfe P_4 = [table_ptr1], -16
+(p0) fnma.s1 E_hold = E, U, f1 ;;
+}
+{ .mmb
+ nop.m 999
+//
+// Z = V * E
+//
+(p0) ldfe P_3 = [table_ptr1], -16
+ nop.b 999 ;;
+}
+{ .mmb
+ nop.m 999
+//
+// zsq = Z * Z
+//
+(p0) ldfe P_2 = [table_ptr1], -16
+ nop.b 999 ;;
+}
+{ .mmb
+ nop.m 999
+//
+// z8 = zsq * zsq
+//
+(p0) ldfe P_1 = [table_ptr1], -16
+ nop.b 999 ;;
+}
+{ .mlx
+ nop.m 999
+(p0) movl int_temp = 0x24005
+}
+{ .mfi
+ nop.m 999
+(p0) fma.s1 E = E, E_hold, E
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fnma.s1 E_hold = E, U, f1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fma.s1 E = E, E_hold, E
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fmpy.s1 Z = V, E
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+// z_lo = V - A_temp * U
+// if (PR_2) sigma = 1.0
+//
+(p0) fmpy.s1 z_lo = z_lo, E
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fmpy.s1 zsq = Z, Z
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+// z_lo = z_lo * E
+// if (PR_1) sigma = -1.0
+//
+(p0) fadd.s1 A_hi = A_temp, z_lo
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// z8 = z8 * z8
+//
+//
+// Now what we want to do is
+// poly1 = P_4 + zsq*(P_5 + zsq*(P_6 + zsq*(P_7 + zsq*P_8)))
+// poly2 = zsq*(P_1 + zsq*(P_2 + zsq*P_3))
+//
+(p0) fma.s1 poly1 = zsq, P_8, P_7
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fma.s1 poly2 = zsq, P_3, P_2
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fmpy.s1 z8 = zsq, zsq
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fsub.s1 A_temp = A_temp, A_hi
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// A_lo = Z * poly + z_lo
+//
+(p0) fmerge.s tmp = A_hi, A_hi
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// poly1 = P_7 + zsq * P_8
+// poly2 = P_2 + zsq * P_3
+//
+(p0) fma.s1 poly1 = zsq, poly1, P_6
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fma.s1 poly2 = zsq, poly2, P_1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fmpy.s1 z8 = z8, z8
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fadd.s1 z_lo = A_temp, z_lo
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// poly1 = P_6 + zsq * poly1
+// poly2 = P_2 + zsq * poly2
+//
+(p0) fma.s1 poly1 = zsq, poly1, P_5
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fmpy.s1 poly2 = poly2, zsq
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Result = Res_hi + Res_lo (User Supplied Rounding Mode)
+//
+(p0) fmpy.s1 P_5 = P_5, P_5
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fma.s1 poly1 = zsq, poly1, P_4
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fma.s1 poly = z8, poly1, poly2
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Fixup added to force inexact later -
+// A_hi = A_temp + z_lo
+// z_lo = (A_temp - A_hi) + z_lo
+//
+(p0) fma.s1 A_lo = Z, poly, z_lo
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fadd.s1 A_hi = tmp, A_lo
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fsub.s1 tmp = tmp, A_hi
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fmpy.s1 A_hi = s_Y, A_hi
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fadd.s1 A_lo = tmp, A_lo
+ nop.i 999
+}
+{ .mfi
+(p0) setf.exp tmp = int_temp
+//
+// P_hi = s_Y * P_hi
+// A_hi = s_Y * A_hi
+//
+(p0) fma.s1 Res_hi = sigma, A_hi, P_hi
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p6,p0 = A_lo, 0x007
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p6) mov A_lo = tmp
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+// Res_hi = P_hi + sigma * A_hi
+//
+(p0) fsub.s1 tmp = P_hi, Res_hi
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// tmp = P_hi - Res_hi
+//
+(p0) fma.s1 tmp = A_hi, sigma, tmp
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fma.s1 sigma = A_lo, sigma, P_lo
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// tmp = sigma * A_hi + tmp
+// sigma = A_lo * sigma + P_lo
+//
+(p0) fma.s1 Res_lo = s_Y, sigma, tmp
+ nop.i 999 ;;
+}
+{ .mfb
+ nop.m 999
+//
+// Res_lo = s_Y * sigma + tmp
+//
+(p0) fadd.s0 Result = Res_lo, Res_hi
+br.ret.sptk b0 ;;
+}
+L(ATANL_NATVAL):
+L(ATANL_UNSUPPORTED):
+L(ATANL_NAN):
+{ .mfb
+ nop.m 999
+(p0) fmpy.s0 Result = ArgX,ArgY
+(p0) br.ret.sptk b0 ;;
+}
+L(ATANL_SPECIAL_HANDLING):
+{ .mfi
+ nop.m 999
+(p0) fcmp.eq.s0 p0, p6 = f1, ArgY_orig
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fcmp.eq.s0 p0, p5 = f1, ArgX_orig
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p6, p7 = ArgY, 0x007
+ nop.i 999
+}
+{ .mlx
+ nop.m 999
+(p0) movl special = 992
+}
+;;
+
+
+{ .mmi
+ nop.m 999
+(p0) addl table_ptr1 = @ltoff(Constants_atan#), gp
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ld8 table_ptr1 = [table_ptr1]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+
+{ .mib
+(p0) add table_ptr1 = table_ptr1, special
+ nop.i 999
+(p7) br.cond.spnt L(ATANL_ArgY_Not_ZERO) ;;
+}
+{ .mmf
+(p0) ldfd Result = [table_ptr1], 8
+ nop.m 999
+(p6) fclass.m.unc p14, p0 = ArgX, 0x035 ;;
+}
+{ .mmf
+ nop.m 999
+(p0) ldfd Result_lo = [table_ptr1], -8
+(p6) fclass.m.unc p15, p0 = ArgX, 0x036 ;;
+}
+{ .mfi
+ nop.m 999
+(p14) fmerge.s Result = ArgY, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p6) fclass.m.unc p13, p0 = ArgX, 0x007
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p14) fmerge.s Result_lo = ArgY, f0
+ nop.i 999 ;;
+}
+{ .mfi
+(p13) mov GR_Parameter_TAG = 36
+ nop.f 999
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Return sign_Y * 0 when ArgX > +0
+//
+(p15) fmerge.s Result = ArgY, Result
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p15) fmerge.s Result_lo = ArgY, Result_lo
+ nop.i 999 ;;
+}
+{ .mfb
+ nop.m 999
+//
+// Return sign_Y * 0 when ArgX < -0
+//
+(p0) fadd.s0 Result = Result, Result_lo
+(p13) br.cond.spnt __libm_error_region ;;
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+//
+// Call error support funciton for atan(0,0)
+//
+(p0) br.ret.sptk b0 ;;
+}
+L(ATANL_ArgY_Not_ZERO):
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p9, p10 = ArgY, 0x023
+ nop.i 999 ;;
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+(p10) br.cond.spnt L(ATANL_ArgY_Not_INF) ;;
+}
+{ .mfi
+ nop.m 999
+(p9) fclass.m.unc p6, p0 = ArgX, 0x017
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p9) fclass.m.unc p7, p0 = ArgX, 0x021
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p9) fclass.m.unc p8, p0 = ArgX, 0x022
+ nop.i 999 ;;
+}
+{ .mmi
+(p6) add table_ptr1 = 16, table_ptr1 ;;
+(p0) ldfd Result = [table_ptr1], 8
+ nop.i 999 ;;
+}
+{ .mfi
+(p0) ldfd Result_lo = [table_ptr1], -8
+ nop.f 999
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p6) fmerge.s Result = ArgY, Result
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p6) fmerge.s Result_lo = ArgY, Result_lo
+ nop.i 999 ;;
+}
+{ .mfb
+ nop.m 999
+(p6) fadd.s0 Result = Result, Result_lo
+(p6) br.ret.sptk b0 ;;
+}
+//
+// Load PI/2 and adjust its sign.
+// Return +PI/2 when ArgY = +Inf and ArgX = +/-0 or normal
+// Return -PI/2 when ArgY = -Inf and ArgX = +/-0 or normal
+//
+{ .mmi
+(p7) add table_ptr1 = 32, table_ptr1 ;;
+(p7) ldfd Result = [table_ptr1], 8
+ nop.i 999 ;;
+}
+{ .mfi
+(p7) ldfd Result_lo = [table_ptr1], -8
+ nop.f 999
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p7) fmerge.s Result = ArgY, Result
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p7) fmerge.s Result_lo = ArgY, Result_lo
+ nop.i 999 ;;
+}
+{ .mfb
+ nop.m 999
+(p7) fadd.s0 Result = Result, Result_lo
+(p7) br.ret.sptk b0 ;;
+}
+//
+// Load PI/4 and adjust its sign.
+// Return +PI/4 when ArgY = +Inf and ArgX = +Inf
+// Return -PI/4 when ArgY = -Inf and ArgX = +Inf
+//
+{ .mmi
+(p8) add table_ptr1 = 48, table_ptr1 ;;
+(p8) ldfd Result = [table_ptr1], 8
+ nop.i 999 ;;
+}
+{ .mfi
+(p8) ldfd Result_lo = [table_ptr1], -8
+ nop.f 999
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p8) fmerge.s Result = ArgY, Result
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p8) fmerge.s Result_lo = ArgY, Result_lo
+ nop.i 999 ;;
+}
+{ .mfb
+ nop.m 999
+(p8) fadd.s0 Result = Result, Result_lo
+(p8) br.ret.sptk b0 ;;
+}
+L(ATANL_ArgY_Not_INF):
+{ .mfi
+ nop.m 999
+//
+// Load PI/4 and adjust its sign.
+// Return +3PI/4 when ArgY = +Inf and ArgX = -Inf
+// Return -3PI/4 when ArgY = -Inf and ArgX = -Inf
+//
+(p0) fclass.m.unc p6, p0 = ArgX, 0x007
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p7, p0 = ArgX, 0x021
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p8, p0 = ArgX, 0x022
+ nop.i 999 ;;
+}
+{ .mmi
+(p6) add table_ptr1 = 16, table_ptr1 ;;
+(p6) ldfd Result = [table_ptr1], 8
+ nop.i 999 ;;
+}
+{ .mfi
+(p6) ldfd Result_lo = [table_ptr1], -8
+ nop.f 999
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p6) fmerge.s Result = ArgY, Result
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p6) fmerge.s Result_lo = ArgY, Result_lo
+ nop.i 999 ;;
+}
+{ .mfb
+ nop.m 999
+(p6) fadd.s0 Result = Result, Result_lo
+(p6) br.ret.spnt b0 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// return = sign_Y * PI/2 when ArgX = 0
+//
+(p7) fmerge.s Result = ArgY, f0
+ nop.i 999 ;;
+}
+{ .mfb
+ nop.m 999
+(p7) fnorm.s0 Result = Result
+(p7) br.ret.spnt b0 ;;
+}
+//
+// return = sign_Y * 0 when ArgX = Inf
+//
+{ .mmi
+(p8) ldfd Result = [table_ptr1], 8 ;;
+(p8) ldfd Result_lo = [table_ptr1], -8
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p8) fmerge.s Result = ArgY, Result
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p8) fmerge.s Result_lo = ArgY, Result_lo
+ nop.i 999 ;;
+}
+{ .mfb
+ nop.m 999
+(p8) fadd.s0 Result = Result, Result_lo
+(p8) br.ret.sptk b0 ;;
+}
+//
+// return = sign_Y * PI when ArgX = -Inf
+//
+.endp atan2l
+ASM_SIZE_DIRECTIVE(atan2l)
+ASM_SIZE_DIRECTIVE(__atan2l)
+ASM_SIZE_DIRECTIVE(__ieee754_atan2l)
+
+.proc __libm_error_region
+__libm_error_region:
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfe [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mib
+ stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
+}
+{ .mib
+ stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+{ .mmi
+ ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/s_cbrt.S b/sysdeps/ia64/fpu/s_cbrt.S
new file mode 100644
index 0000000..cb17c46
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_cbrt.S
@@ -0,0 +1,676 @@
+.file "cbrt.asm"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
+// Bob Norin, Shane Story, and Ping Tak Peter Tang
+// of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00: Initial version
+// 5/19/00: New version (modified algorithm)
+//
+// API
+//==============================================================
+// double cbrt(double)
+//
+// Overview of operation
+//==============================================================
+// Background
+//
+// Implementation
+//
+// cbrt(a) = cbrt(a y) / cbrt(y)
+// = cbrt(1 - (1 - a y)) * 1/cbrt(y)
+//
+// where y = frcpa(a).
+//
+// * cbrt(1 - (1 - a y)) is approximated by a degree-5 polynomial
+//
+// 1 - (1/3)*r - (1/9)*r^2 - (5/81)*r^3 - (10/243)*r^4 - (22/729)*r^5
+//
+// in r = 1 - a y.
+//
+// * The values 1/cbrt(y) are stored in a table of constants T0
+// to 64 bits of accuracy
+//
+// The table values are stored for three exponent values and are
+// then multiplied by e/3 where e is the exponent of the input number.
+// This computation is carried out in parallel with the polynomial
+// evaluation:
+//
+// T = 2^(e/3) * T0
+
+
+
+
+
+//===============
+// input = x
+// C = frcpa(x)
+// r = 1 - C * x
+//
+// Special values
+//==============================================================
+
+
+
+// Registers used
+//==============================================================
+// f6-f15
+// r2, r23-r26, r28-r30
+// p6,p7,p8,p12
+
+#include "libm_support.h"
+
+// Data tables
+//==============================================================
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
+.align 16
+
+poly_coeffs:
+ASM_TYPE_DIRECTIVE(poly_coeffs,@object)
+data8 0xaaaaaaaaaaaaaaab, 0x00003ffd // 1/3
+data8 0x3fbc71c71c71c71d, 0x3faf9add3c0ca459
+data8 0x3fa511e8d2b3183b, 0x3f9ee7113506ac13
+ASM_SIZE_DIRECTIVE(poly_coeffs)
+
+T_table:
+ASM_TYPE_DIRECTIVE(T_table,@object)
+
+data8 0x80155c748c374836, 0xa160019ed37fb4ae
+data8 0xcb51ddcb9e93095e, 0x8040404b0879f7f9
+data8 0xa1960b5966da4608, 0xcb95f333968ad59b
+data8 0x806b5dce4b405c10, 0xa1cc5dbe6dc2aab4
+data8 0xcbda64292d3ffd97, 0x8096b586974669b1
+data8 0xa202f97995b69c0d, 0xcc1f3184af961596
+data8 0x80bcd273d952a028, 0xa232fe6eb0c0577d
+data8 0xcc5bb1ac954d33e2, 0x80e898c52813f2f3
+data8 0xa26a2582012f6e17, 0xcca12e9831fc6402
+data8 0x81149add67c2d208, 0xa2a197e5d10465cb
+data8 0xcce70a67b64f24ad, 0x813b4e2c856b6e9a
+data8 0xa2d25a532efefbc8, 0xcd24794726477ea5
+data8 0x8167c1dde03de7aa, 0xa30a5bd6e49e4ab8
+data8 0xcd6b096a0b70ee87, 0x818ed973b811135e
+data8 0xa33b9c9b59879e24, 0xcda9177738b15a90
+data8 0x81bbc0c33e13ec98, 0xa3742fca6a3c1f21
+data8 0xcdf05f2247dffab9, 0x81e33e69fbe7504a
+data8 0xa3a5f1273887bf22, 0xce2f0f347f96f906
+data8 0x820aec524e3c23e9, 0xa3d7ef508ff11574
+data8 0xce6e0be0cd551a61, 0x823880f78e70b805
+data8 0xa4115ce30548bc15, 0xceb666b2c347d1de
+data8 0x826097a62a8e5200, 0xa443df0e53df577a
+data8 0xcef609b0cb874f00, 0x8288dfe00e9b5eaf
+data8 0xa4769fa5913c0ec3, 0xcf35fb5447e5c765
+data8 0x82b15a10c5371624, 0xa4a99f303bc7def5
+data8 0xcf763c47ee869f00, 0x82da06a527b18937
+data8 0xa4dcde37779adf4b, 0xcfb6cd3888d71785
+data8 0x8302e60b635ab394, 0xa5105d46152c938a
+data8 0xcff7aed4fbfbb447, 0x832bf8b2feec2f0e
+data8 0xa5441ce89825cb8d, 0xd038e1ce5167e3c6
+data8 0x83553f0ce00e276b, 0xa5781dad3e54d899
+data8 0xd07a66d7bfa0ebba, 0x837eb98b50f8322a
+data8 0xa5ac602406c4e68c, 0xd0bc3ea6b32d1b21
+data8 0x83a270f44c84f699, 0xa5d9601d95c2c0bc
+data8 0xd0f4f0e8f36c1bf8, 0x83cc4d7cfcfac5ca
+data8 0xa60e1e1a2de14745, 0xd1376458e34b037e
+data8 0x83f65f78a8872b4c, 0xa6431f6e3fbd9658
+data8 0xd17a2ca133f78572, 0x8420a75f2f7b53c8
+data8 0xa67864b0d432fda4, 0xd1bd4a80301c5715
+data8 0x844510461ff14209, 0xa6a6444aa0243c0b
+data8 0xd1f71682b2fa4575, 0x846fbd91b930bed2
+data8 0xa6dc094d10f25792, 0xd23ad555f773f059
+data8 0x84947e18234f3294, 0xa70a574cc02bba69
+data8 0xd2752c7039a5bf73, 0x84bf92755825045a
+data8 0xa7409e2af9549084, 0xd2b98ee008c06b59
+data8 0x84e4ac0ee112ba51, 0xa76f5c64ca2cf13b
+data8 0xd2f4735ffd700280, 0x8509ef44b86f20be
+data8 0xa79e4f0babab5dc0, 0xd32f99ed6d9ac0e1
+data8 0x85359d5d91768427, 0xa7d5579ae5164b85
+data8 0xd374f0666c75d51c, 0x855b3bd5b7384357
+data8 0xa804bd3c6fe61cc8, 0xd3b0a7d13618e4a1
+data8 0x858104f0c415f79a, 0xa8345895e5250a5a
+data8 0xd3eca2ea53bcec0c, 0x85a6f90390d29864
+data8 0xa8642a122b44ef0b, 0xd428e23874f13a17
+data8 0x85d3772fcd56a1dd, 0xa89c38ca18f6108b
+data8 0xd46f82fe293bc6d3, 0x85f9c982fcc002f3
+data8 0xa8cc81063b6e87ca, 0xd4ac57e9b7186420
+data8 0x862047e0e7ea554b, 0xa8fd00bfa409285e
+data8 0xd4e972becb04e8b8, 0x8646f2a26f7f5852
+data8 0xa92db8664d5516da, 0xd526d40a7a9b43a3
+data8 0x866dca21754096b5, 0xa95ea86b75cc2c20
+data8 0xd5647c5b73917370, 0x8694ceb8dfd17a37
+data8 0xa98fd141a4992deb, 0xd5a26c4201bd6d13
+data8 0x86bc00c49e9307e8, 0xa9c1335cae7446ba
+data8 0xd5e0a45015350a7e, 0x86dccd74fce79610
+data8 0xa9ea8686f556f645, 0xd614b539c6194104
+data8 0x870453c845acf90f, 0xaa1c52d17906bb19
+data8 0xd6537310e224283f, 0x872c089a1e90342c
+data8 0xaa4e59b046dab887, 0xd6927ab62244c917
+data8 0x8753ec4a92d16c5e, 0xaa809b9c60d1890b
+data8 0xd6d1ccc1fc4ef4b7, 0x877bff3aca19f6b4
+data8 0xaab319102f3f9b33, 0xd71169cea98fdded
+data8 0x879d88b6fe1c324c, 0xaadd5a18c1e21274
+data8 0xd746a66a5bc9f6d9, 0x87c5f346dbf98c3a
+data8 0xab1045f2ac31bdf5, 0xd786ce8f0fae5317
+data8 0x87e7c653efacef2c, 0xab3ae3ab2df7231e
+data8 0xd7bc7ff214c4e75a, 0x881089d4e73ffefc
+data8 0xab6e3f945d1e96fc, 0xd7fd35467a517ed1
+data8 0x88397e6a366f2a8a, 0xaba1d953a08fa94e
+data8 0xd83e38838648d815, 0x885bc559e5e1c081
+data8 0xabcd090db7ef4c3f, 0xd874a1db598b8951
+data8 0x887e2ee392bb7a93, 0xabf864602d7c323d
+data8 0xd8ab42205b80edaf, 0x88a7a8587e404257
+data8 0xac2ca5886ccf9b57, 0xd8ed1849d202f965
+data8 0x88ca5eda67594784, 0xac5861d4aa441f0f
+data8 0xd92432bd5a173685, 0x88f4356166bd590e
+data8 0xac8d183fe3a2fbed, 0xd9669ca45b03c23e
+data8 0x89173a0acf5ce026, 0xacb93703ff51571e
+data8 0xd99e3327cf89574e, 0x893a62a098b6a57b
+data8 0xace5830ad0c3f14b, 0xd9d602b19b100466
+data8 0x895daf637236ae2c, 0xad11fca5d78b3ff2
+data8 0xda0e0ba86c096841, 0x89883b9d1c2fa9c5
+data8 0xad4797fddf91a798, 0xda5195fcdb1c3dce
+data8 0x89abd8dd374a5d7b, 0xad747701e559ebcb
+data8 0xda8a1eb87a491f6c, 0x89cf9b1dcd197fa0
+data8 0xada184a47e9c7613, 0xdac2e230b91c3f84
+data8 0x89f382a258ea79de, 0xadcec13ab0dda8ff
+data8 0xdafbe0d0b66aea30, 0x8a178faf06648f29
+data8 0xadfc2d1a5fd21ba8, 0xdb351b04a8fafced
+data8 0x8a3bc288b3e1d18a, 0xae29c89a5053c33a
+data8 0xdb6e9139e33cdd8e, 0x8a601b74f4d1f835
+data8 0xae5794122b638df9, 0xdba843ded7151ea1
+data8 0x8a849aba14274764, 0xae858fda8137ae0a
+data8 0xdbe2336319b61fc8, 0x8aa9409f16cdbc9b
+data8 0xaeb3bc4ccc56d3d1, 0xdc1c60376789fa68
+data8 0x8ace0d6bbe2cb316, 0xaee219c374c09920
+data8 0xdc56cacda82d0cd5, 0x8af301688ab33558
+data8 0xaf10a899d3235fe7, 0xdc917398f2797814
+data8 0x8b181cdebe6f3206, 0xaf3f692c341fe8b4
+data8 0xdccc5b0d90a3e628, 0x8b3d60185fafcb7c
+data8 0xaf6e5bd7db9ae6c2, 0xdd0781a10469f0f2
+data8 0x8b62cb603bb2fad0, 0xaf9d80fb081cd91b
+data8 0xdd42e7ca0b52838f, 0x8b80d7d6bc4104de
+data8 0xafc35ce063eb3787, 0xdd729ad01c69114d
+data8 0x8ba68bf73ac74f39, 0xaff2ddcb5f28f03d
+data8 0xddae749c001fbf5e, 0x8bcc68fb9f9f7335
+data8 0xb022923b148e05c5, 0xddea8f50a51c69b1
+data8 0x8bf26f31c534fca2, 0xb0527a919adbf58b
+data8 0xde26eb69a0f0f111, 0x8c10f86e13a1a1f9
+data8 0xb078f3ab1d701c65, 0xde576480262399bc
+data8 0x8c3749916cc6abb5, 0xb0a93a6870649f31
+data8 0xde943789645933c8, 0x8c5dc4c4f7706032
+data8 0xb0d9b624d62ec856, 0xded14d58139a28af
+data8 0x8c7cac3a8c42e3e0, 0xb100a5f53fb3c8e1
+data8 0xdf025c00bbf2b5c7, 0x8ca373f1b7bf2716
+data8 0xb131821882f5540a, 0xdf3feb44d723a713
+data8 0x8cc29907fb951294, 0xb158bf8e4cb04055
+data8 0xdf715bc16c159be0, 0x8ce9ae4e9492aac8
+data8 0xb189fd69d56b238f, 0xdfaf66240e29cda8
+data8 0x8d0911dddbfdad0e, 0xb1b189958e8108e4
+data8 0xdfe139cbf6e19bdc, 0x8d3075c4f20f04ee
+data8 0xb1e32a8165b09832, 0xe01fc0fe94d9fc52
+data8 0x8d5018a9d4de77d5, 0xb20b0678fc271eec
+data8 0xe051f92ffcc0bd60, 0x8d77cc47dd143515
+data8 0xb23d0bd3f7592b6e, 0xe090feec9c9a06ac
+data8 0x8d97af6352739cb7, 0xb26538b2db8420dc
+data8 0xe0c39d0c9ff862d6, 0x8db7af523167800f
+data8 0xb28d89e339ceca14, 0xe0f668eeb99f188d
+data8 0x8ddfd80bc68c32ff, 0xb2c022ca12e55a16
+data8 0xe1362890eb663139, 0x8e00197e1e7c88fe
+data8 0xb2e8c6852c6b03f1, 0xe1695c7212aecbaa
+data8 0x8e207859f77e20e7, 0xb3118f4eda9fe40f
+data8 0xe19cbf0391bbbbe9, 0x8e40f4ce60c9f8e2
+data8 0xb33a7d6268109ebe, 0xe1d050901c531e85
+data8 0x8e69ba46cf2fde4d, 0xb36ddbc5ea70ec55
+data8 0xe2110903b4f4047a, 0x8e8a7a00bd7ae63e
+data8 0xb3971e9b39264023, 0xe2450559b4d80b6d
+data8 0x8eab57ef1cf2f529, 0xb3c0877ecc18e24a
+data8 0xe27931a231554ef3, 0x8ecc5442cffb1dad
+data8 0xb3ea16ae3a6c905f, 0xe2ad8e2ac3c5b04b
+data8 0x8eed6f2d2a4acbfe, 0xb413cc67aa0e4d2d
+data8 0xe2e21b41b9694cce, 0x8f0ea8dff24441ff
+data8 0xb43da8e9d163e1af, 0xe316d93615862714
+data8 0x8f385c95d696b817, 0xb47233773b84d425
+data8 0xe3590bd86a0d30f9, 0x8f59dc43edd930f3
+data8 0xb49c6825430fe730, 0xe38e38e38e38e38e
+data8 0x8f7b7b5f5ffad1c4, 0xb4c6c46bcdb27dcf
+data8 0xe3c397d1e6db7839, 0x8f9d3a1bea165f38
+data8 0xb4f1488c0b35d26f, 0xe3f928f5953feb9e
+data8 0x8fbf18adc34b66da, 0xb51bf4c7c51f0168
+data8 0xe42eeca17c62886c, 0x8fe117499e356095
+data8 0xb546c9616087ab9c, 0xe464e32943446305
+data8 0x90033624aa685f8d, 0xb571c69bdffd9a70
+data8 0xe49b0ce15747a8a2, 0x9025757495f36b86
+data8 0xb59cecbae56984c3, 0xe4d16a1eee94e9d4
+data8 0x903f3a5dcc091203, 0xb5bd64512bb14bb7
+data8 0xe4fa52107353f67d, 0x9061b2fceb2bdbab
+data8 0xb5e8d2a4bf5ba416, 0xe5310a471f4d2dc3
+data8 0x90844ca7211032a7, 0xb6146a9a1bc47819
+data8 0xe567f6f1c2b9c224, 0x90a7079403e6a15d
+data8 0xb6402c7749d621c0, 0xe59f18689a9e4c9a
+data8 0x90c9e3fbafd63799, 0xb66c1882fb435ea2
+data8 0xe5d66f04b8a68ecf, 0x90ece216c8a16ee4
+data8 0xb6982f048c999a56, 0xe60dfb2005c192e9
+data8 0x9110021e7b516f0a, 0xb6c47044075b4142
+data8 0xe645bd1544c7ea51, 0x912a708a39be9075
+data8 0xb6e5bd6bfd02bafd, 0xe66fb21b505b20a0
+data8 0x914dcc7b31146370, 0xb7124a2736ff8ef2
+data8 0xe6a7d32af4a7c59a, 0x91714af8cfe984d5
+data8 0xb73f026a01e94177, 0xe6e02b129c6a5ae4
+data8 0x918c00a6f3795e97, 0xb760a959f1d0a7a7
+data8 0xe70a9136a7403039, 0x91afbc299ed0295d
+data8 0xb78dae7e06868ab0, 0xe74349fb2d92a589
+data8 0x91d39add3e958db0, 0xb7badff8ad9e4e02
+data8 0xe77c3a9c86ed7d42, 0x91ee9920a8974d92
+data8 0xb7dce25b8e17ae9f, 0xe7a713f88151518a
+data8 0x9212b5fcac537c19, 0xb80a6226904045e2
+data8 0xe7e067453317ed2b, 0x9236f6b256923fcf
+data8 0xb8380f1cafd73c1c, 0xe819f37a81871bb5
+data8 0x92523ee6f90dcfc3, 0xb85a6ea8e321b4d8
+data8 0xe8454236bfaeca14, 0x9276bef031e6eb79
+data8 0xb8886b684ae7d2fa, 0xe87f32f24c3fc90e
+data8 0x929236ec237a24ad, 0xb8ab0726fa00cf5d
+data8 0xe8aacd8688892ba6, 0x92b6f70b7efe9dc3
+data8 0xb8d954a4d13b7cb1, 0xe8e523fd32f606f7
+data8 0x92d29f61eec7dc2b, 0xb8fc2d4f6cd9f04a
+data8 0xe9110b5311407927, 0x92f7a05d5b8ba92f
+data8 0xb92acc851476b1ab, 0xe94bc8bf0c108fa3
+data8 0x931379a403be5c16, 0xb94de2d841a184c2
+data8 0xe977fdc439c2ca3c, 0x9338bc44de2e3f34
+data8 0xb97cd4c36c92693c, 0xe9b3236528fc349e
+data8 0x9354c71412c69486, 0xb9a0297f172665e3
+data8 0xe9dfa70b745ac1b4, 0x937a4c273907e262
+data8 0xb9cf6f21e36c3924, 0xea1b36268d0eaa38
+data8 0x93968919f6e7975d, 0xb9f3030951267208
+data8 0xea480963fd394197, 0x93bc516fdd4680c9
+data8 0xba229d6a618e7c59, 0xea84034425f27484
+data8 0x93d8c123d9be59b2, 0xba467144459f9855
+data8 0xeab12713138dd1cc, 0x93f546c955e60076
+data8 0xba6a60c3c48f1a4b, 0xeade6db73a5e503b
+data8 0x941b70a65879079f, 0xba9a76056b67ee7a
+data8 0xeb1b0268343b121b, 0x943829f337410591
+data8 0xbabea699563ada6e, 0xeb489b0b2bdb5f14
+data8 0x9454f995765bc4d2, 0xbae2f350b262cc4b
+data8 0xeb765721e85f03d0, 0x947b86b57f5842ed
+data8 0xbb1385a23be24e57, 0xebb389645f222f62
+data8 0x94988aeb23470f86, 0xbb3814975e17c680
+data8 0xebe198f090607e0c, 0x94b5a5dc9695f42a
+data8 0xbb5cc031009bf467, 0xec0fcc9321024509
+data8 0x94d2d7a9170d8b42, 0xbb81889680024764
+data8 0xec3e247da8b82f61, 0x94f9e87dd78bf019
+data8 0xbbb2c0d8703ae95d, 0xec7c27d21321c9f7
+data8 0x95175019a503d89e, 0xbbd7cd09ba3c5463
+data8 0xecaad5278824e453, 0x9534cefa625fcb3a
+data8 0xbbfcf68c4977718f, 0xecd9a76d097d4e77
+data8 0x955265405c491a25, 0xbc223d88cfc88eee
+data8 0xed089ed5dcd99446, 0x9570130c1f9bb857
+data8 0xbc47a2284fee4ff8, 0xed37bb95add09a1c
+data8 0x9597ca4119525184, 0xbc79ac0916ed7b8a
+data8 0xed76c70508f904b6, 0x95b5af6fb5aa4d3c
+data8 0xbc9f5670d1a13030, 0xeda63bb05e7f93c6
+data8 0x95d3ac9273aafd7a, 0xbcc51f068cb95c1d
+data8 0xedd5d661daed2dc4, 0x95f1c1cafdfd3684
+data8 0xbceb05f4b30a9bc0, 0xee05974eef86b903
+data8 0x960fef3b430b8d5f, 0xbd110b6604c7d306
+data8 0xee357ead791fc670, 0x962e350575b409c5
+data8 0xbd372f8598620f19, 0xee658cb3c134a463
+data8 0x964c934c0dfc1708, 0xbd5d727edb6b3c7e
+data8 0xee95c1987f080211, 0x966b0a31c9c6bc7d
+data8 0xbd83d47d937bbc6d, 0xeec61d92d8c4314f
+data8 0x968999d9ad8d264e, 0xbdaa55addf1ae47d
+data8 0xeef6a0da64a014ac, 0x96a8426705198795
+data8 0xbdd0f63c36aa73f0, 0xef274ba72a07c811
+data8 0x96c703fd64445ee5, 0xbdf7b6556d550a15
+data8 0xef581e31a2c91260, 0x96e5dec0a7b4268d
+data8 0xbe1e9626b1ffa96b, 0xef8918b2bc43aec6
+data8 0x9704d2d4f59f79f3, 0xbe4595dd903e5371
+data8 0xefba3b63d89d7cbf, 0x9723e05ebe91b9b0
+data8 0xbe6cb5a7f14bc935, 0xefeb867ecffaa607
+data8 0x97430782be323831, 0xbe93f5b41d047cf7
+data8 0xf01cfa3df1b9c9fa, 0x97624865fc0df8bf
+data8 0xbebb5630bae4c15f, 0xf04e96dc05b43e2d
+data8 0x9781a32dcc640b2a, 0xbee2d74cd30a430c
+data8 0xf0805c944d827454, 0x97a117ffd0f48e46
+data8 0xbf0a7937cf38d981, 0xf0b24ba285c495cb
+data8 0x97c0a701f9d263c9, 0xbf323c217be2bc8c
+data8 0xf0e46442e76f6569, 0x97e0505a8637a036
+data8 0xbf5a203a09342bbb, 0xf116a6b2291d7896
+data8 0x97f57a9fb0b08c6e, 0xbf74cad1c14ebfc4
+data8 0xf1383fa9e9b5b381, 0x9815503365914a9d
+data8 0xbf9ce6a497a89f78, 0xf16ac84f90083b9b
+data8 0x98354085054fd204, 0xbfc52428bec6e72f
+data8 0xf19d7b686dcb03d7, 0x98554bbbf8a77902
+data8 0xbfed838fddab024b, 0xf1d0593311db1757
+data8 0x987571fffb7f94f6, 0xc016050c0420981a
+data8 0xf20361ee8f1c711e, 0x9895b3791dd03c23
+data8 0xc03ea8cfabddc330, 0xf23695da7de51d3f
+data8 0x98ab43a5fc65d0c8, 0xc059d3cbd65ddbce
+data8 0xf258d095e465cc35, 0x98cbb2d196bd713d
+data8 0xc082b122a3c78c9d, 0xf28c4d0bfc982b34
+data8 0x98ec3d9ec7b6f21a, 0xc0abb1499ae736c4
+data8 0xf2bff55eb3f0ea71, 0x990ce436db5e8344
+data8 0xc0d4d474c3aedaaf, 0xf2f3c9cf9884636e
+data8 0x9922b8218160967a, 0xc0f054ca33eb3437
+data8 0xf31670135ab9cc0f, 0x99438d686f75779d
+data8 0xc119b2c67e600ed0, 0xf34a8e9f0b54cdfb
+data8 0x99647eea131fa20b, 0xc1433453de2033ff
+data8 0xf37ed9fa6b8add3f, 0x997a85045a47c6d0
+data8 0xc15ef3e44e10032d, 0xf3a1cfe884ef6bb6
+data8 0x999ba5f14f8add02, 0xc188b130431d80e6
+data8 0xf3d66689dcc8e8d3, 0x99bce38b5465ecae
+data8 0xc1b2929d6067730e, 0xf40b2ab069d5c96a
+data8 0x99d31ca0887f30f9, 0xc1ce9268f31cc734
+data8 0xf42e718b90c8bc16, 0x99f48a669c74c09e
+data8 0xc1f8b0877c1b0c08, 0xf463822a0a3b4b00
+data8 0x9a16154eb445c873, 0xc222f35a87b415ba
+data8 0xf498c1076015faf8, 0x9a2c822ec198d667
+data8 0xc23f3467349e5c88, 0xf4bc5a19a33990b5
+data8 0x9a4e3e080cd91b78, 0xc269b4e40e088c01
+data8 0xf4f1e6a7d6f5425f, 0x9a70177afe52322e
+data8 0xc2945aac24daaf6e, 0xf527a232cf6be334
+data8 0x9a86b8fa94eebe10, 0xc2b0de05e43c1d66
+data8 0xf54b8ecdcda90851, 0x9aa8c42866ae2958
+data8 0xc2dbc275e1229d09, 0xf5819949c7ad87b4
+data8 0x9abf86f9e12fc45e, 0xc2f86fca9d80eeff
+data8 0xf5a5bac9213b48a9, 0x9ae1c462fc05f49d
+data8 0xc323938449a2587e, 0xf5dc1501f324a812
+data8 0x9af8a8dc936b84d0, 0xc3406b40a538ed20
+data8 0xf6006bee86b5589e, 0x9b1b19033be35730
+data8 0xc36bcee8211d15e0, 0xf63716b2fa067fa4
+data8 0x9b3da7daf04c2892, 0xc397593adf2ba366
+data8 0xf66df22fb6132b9c, 0x9b54c2e4c8a9012b
+data8 0xc3b475b6206155d5, 0xf6929fb98225deb1
+data8 0x9b77854e6c661200, 0xc3e0410243b97383
+data8 0xf6c9cd13021e3fea, 0x9b8ec2e678d56d2f
+data8 0xc3fd890709833d37, 0xf6eeb177472cedae
+data8 0x9ba60e6a5ca133b6, 0xc41ae295f7e7fa06
+data8 0xf713abf4cb0b3afb, 0x9bc919ea66a151a4
+data8 0xc44709f7bb8a4dd2, 0xf74b4d5333684ef1
+data8 0x9be0887c09ef82bb, 0xc4648fb0e0bec4c1
+data8 0xf7707f75a72f8e94, 0x9c03c8d5fffc3503
+data8 0xc490f9a94695ba14, 0xf7a874b97927af44
+data8 0x9c1b5ad21a81cbb9, 0xc4aeac0173b7d390
+data8 0xf7cddf140aedf1d8, 0x9c3ed09216e9ca02
+data8 0xc4db5941007aa853, 0xf806291bacb7f7a9
+data8 0x9c568656c0423def, 0xc4f938aec206291a
+data8 0xf82bcc43b92eafef, 0x9c7a320af242ce60
+data8 0xc52629e899dfd622, 0xf8646bf0defb759e
+data8 0x9c920bf7a8c01dc2, 0xc54436e44043b965
+data8 0xf88a487dfc3ff5f7, 0x9ca9f475d98b159c
+data8 0xc562563abf9ea07f, 0xf8b03c2b46cdc17f
+data8 0x9ccdeca60e80b5f8, 0xc58fa7d1dc42921c
+data8 0xf8e95541c152ae7a, 0x9ce5f9d4653d4902
+data8 0xc5adf561b91e110a, 0xf90f832c2700c160
+data8 0x9cfe15cb38bfdd8e, 0xc5cc5591bdbd82fa
+data8 0xf935c88e0c7f419b, 0x9d225b983f6c1f96
+data8 0xc5fa08f1ff20593c, 0xf96f5cd84fd86873
+data8 0x9d3a9cca32261ed7, 0xc618980a79ce6862
+data8 0xf995dd53ebdd9d6d, 0x9d52ecfccebe1768
+data8 0xc6373a09e34b50fa, 0xf9bc75a034436a41
+data8 0x9d77818d95b82f86, 0xc66550a6e0baaf35
+data8 0xf9f686f26d5518de, 0x9d8ff7893fa4706c
+data8 0xc6842241926342c9, 0xfa1d5b39b910a8c5
+data8 0x9da87cbef36f2a5e, 0xc6a3070b7c93bb9e
+data8 0xfa4447acc4ecbfd2, 0x9dcd6140b4a35aeb
+data8 0xc6d18260bb84081b, 0xfa7ed7e51e6fdfb4
+data8 0x9de60cd06dc6e2d4, 0xc6f0977c9416828b
+data8 0xfaa601394d49a1a0, 0x9dfec7d4cc43b76f
+data8 0xc70fc0117c641630, 0xfacd431644ce0e40
+data8 0x9e17925ec9fccc4a, 0xc72efc34d7e615be
+data8 0xfaf49d96f7a75909, 0x9e3cdf6db57dc075
+data8 0xc75dfb441594141e, 0xfb2fd3c65e562fd5
+data8 0x9e55d110b63637a8, 0xc77d68aa019bda4c
+data8 0xfb576c5762024805, 0x9e6ed27594550d2e
+data8 0xc79ce9ea478dbc4f, 0xfb7f1debc22c4040
+data8 0x9e87e3adc385d393, 0xc7bc7f1ae453219d
+data8 0xfba6e89f32d0190a, 0x9ead9b54b37a1055
+data8 0xc7ec0476e15e141a, 0xfbe2c803a0894893
+data8 0x9ec6d46a3d7de215, 0xc80bcbe16f1d540f
+data8 0xfc0ad1ff0ed9ecf0, 0x9ee01d9108be3154
+data8 0xc82ba78a5d349735, 0xfc32f57bdfbcbe7f
+data8 0x9ef976db07288d04, 0xc84b978847a06b87
+data8 0xfc5b32968f99b21c, 0x9f12e05a4759ec25
+data8 0xc86b9bf1ee817bc6, 0xfc83896bc861ab08
+data8 0x9f2c5a20f4da6668, 0xc88bb4de3667cdf4
+data8 0xfcabfa1861ed4815, 0x9f52af78ed1733ca
+data8 0xc8bc00e7fe9e23a3, 0xfce8d3cea7d3163e
+data8 0x9f6c52426a39d003, 0xc8dc4d7ff2d25232
+data8 0xfd118595143ee273, 0x9f860593d42fd7f3
+data8 0xc8fcaeebcb40eb47, 0xfd3a519943d4865a
+data8 0x9f9fc97fdb96bd51, 0xc91d25431426a663
+data8 0xfd6337f8e1ae5a4b, 0x9fb99e194f4a7037
+data8 0xc93db09d7fdb2949, 0xfd8c38d1c8e927eb
+data8 0x9fd383731ca51db9, 0xc95e5112e721582a
+data8 0xfdb5544205095a53, 0x9fed79a04fbf9423
+data8 0xc97f06bb49787677, 0xfdde8a67d2613531
+data8 0xa00780b413b24ee8, 0xc99fd1aecd6e1b06
+data8 0xfe07db619e781611, 0xa02eab2c4474b0cd
+data8 0xc9d12a3e27bb1625, 0xfe460768d80bf758
+data8 0xa048dcd51ccfd142, 0xc9f22ad82ba3d5f0
+data8 0xfe6f9bfb06cd32f6, 0xa0631fa894b11b8d
+data8 0xca134113105e67b2, 0xfe994bcd3d14fcc2
+data8 0xa07d73ba65e680af, 0xca346d07b045a876
+data8 0xfec316fecaf3f2ab, 0xa097d91e6aaf71b0
+data8 0xca55aecf0e94bb88, 0xfeecfdaf33fadb80
+data8 0xa0b24fe89e02602f, 0xca77068257be9bab
+data8 0xff16fffe2fa8fad6, 0xa0ccd82d1bd2f68b
+data8 0xca98743ae1c693a8, 0xff411e0ba9db886d
+data8 0xa0e77200215909e6, 0xcab9f8122c99a101
+data8 0xff6b57f7c33e4e9a, 0xa1021d760d584855
+data8 0xcadb9221e268c3b5, 0xff95ade2d1bd7358
+data8 0xa11cdaa36068a57d, 0xcafd4283d8043dfd
+data8 0xffc01fed60f86fb5, 0xa137a99cbd3f880b
+data8 0xcb1f09520d37c6fb, 0xffeaae3832b63956
+ASM_SIZE_DIRECTIVE(T_table)
+
+
+
+
+
+
+.align 32
+.global cbrt#
+
+.section .text
+.proc cbrt#
+.align 32
+cbrt:
+
+
+{ .mfi
+ // get significand
+ getf.sig r23=f8
+ // will continue only for normal/denormal numbers
+ (p0) fclass.nm.unc p12,p0 = f8, 0x1b
+ // r2 = pointer to C_1,...,C_5 followed by T_table
+ addl r2 = @ltoff(poly_coeffs), gp
+}
+{.mfi
+ // get exponent
+ getf.exp r24=f8
+ // normalize a
+ fma.s1 f14=f8,f1,f0
+ // r29=bias-((2^{12}-1)/3) -63=0xffff-0x555-0x3f=0xfa6b
+ mov r29=0xfa6b;;
+}
+{.mlx
+ mov r25=0x20000
+ // r28=2^52
+ movl r28=0x8000000000000000;;
+}
+{.mfb
+ // load start address for C_1,...,C_5 followed by T_table
+ ld8 r3=[r2]
+ (p12) fma.d.s0 f8=f8,f1,f0
+ (p12) br.ret.spnt b0
+}
+{.mfi
+ nop.m 0
+ // y=frcpa(a)
+ frcpa.s0 f8,p6=f1,f8
+ // p7=1 if denormal input
+ cmp.gtu p7,p0=r28,r23;;
+}
+{.mmi
+ // get exponent
+ (p7) getf.exp r24=f14
+ // get normalized significand
+ (p7) getf.sig r23=f14
+ // r28=bias-(2^{12}-1)
+ mov r28=0xf000;;
+}
+{.mii
+ // get r26=sign
+ and r26=r24,r25
+ // eliminate leading 1 from r23=1st table index
+ shl r23=r23,1
+ // eliminate sign from exponent (r25)
+ andcm r25=r24,r25;;
+}
+{.mib
+ add r2=32,r3
+ // r23=1st table index (y_index,8 bits)
+ shr.u r23=r23,56
+ nop.b 0
+}
+{.mib
+ // load C_1
+ ldfe f7=[r3],16
+ // subtract bias from r25=exponent
+ sub r25=r25,r28
+ nop.b 0;;
+}
+{.mib
+ // load C_2, C_3
+ ldfpd f9,f10=[r3]
+ // 1: exponent*=5; // (2^{16}-1)/3=0x5555
+ shladd r24=r25,2,r25
+ nop.b 0
+}
+{.mib
+ // load C_4, C_5
+ ldfpd f11,f12=[r2],16
+ // r23=3*y_index
+ shladd r23=r23,1,r23
+ nop.b 0;;
+}
+
+{.mfi
+ // r30=(5*expon)*16+5*expon=(0x55)*expon
+ shladd r30=r24,4,r24
+ // r=1-a*y
+ (p6) fnma.s1 f6=f8,f14,f1
+ // adjust T_table pointer by 1st index
+ shladd r2=r23,3,r2;;
+}
+
+{.mii
+ nop.m 0
+ // r24=(0x5500)*expon
+ shl r24=r30,8;;
+ // r24=(0x5555)*expon
+ add r24=r24,r30;;
+}
+{.mii
+ // r24=(0x5556)*expon // 0x5556=(2^{16}+2)/3
+ add r24=r24,r25
+ nop.i 0;;
+ // r24=floor(expon/3)
+ shr r24=r24,16;;
+}
+{.mfi
+ // r28=3*exponent
+ shladd r28=r24,1,r24
+ // r2=r*r
+ (p6) fma.s1 f13=f6,f6,f0
+ // bias exponent
+ add r24=r29,r24;;
+}
+{.mfi
+ // get remainder of exponent/3 : r25-r28
+ sub r25=r25,r28
+ // c2+c3*r
+ (p6) fma.s1 f9=f10,f6,f9
+ // add sign to exponent
+ or r24=r24,r26
+}
+{.mfi
+ nop.m 0
+ // c4+c5*r
+ (p6) fma.s1 f11=f12,f6,f11
+ nop.i 0;;
+}
+{.mmi
+ // f14=sign*2^{exponent/3}
+ (p6) setf.exp f14=r24
+ // adjust T_table pointer by 2nd index
+ shladd r2=r25,3,r2
+ nop.i 0;;
+}
+{.mmi
+ // load T
+ (p6) ldf8 f8=[r2]
+ nop.m 0
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // (c2+c3*r)+r^2*(c4+c5*r)
+ (p6) fma.s1 f9=f11,f13,f9
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // c1*r
+ (p6) fma.s1 f7=f7,f6,f0
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // P=c1*r+r^2*[(c2+c3*r)+r^2*(c4+c5*r)]
+ (p6) fma.s1 f9=f9,f13,f7
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // T'=T*(2^exp)
+ (p6) fma.s1 f8=f8,f14,f0
+ nop.i 0;;
+}
+{.mfb
+ nop.m 0
+ // result = T'-T'*P
+ (p6) fnma.d.s0 f8=f8,f9,f8
+ br.ret.sptk b0;;
+}
+.endp cbrt
+ASM_SIZE_DIRECTIVE(cbrt)
diff --git a/sysdeps/ia64/fpu/s_cbrtf.S b/sysdeps/ia64/fpu/s_cbrtf.S
new file mode 100644
index 0000000..620bbb5
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_cbrtf.S
@@ -0,0 +1,655 @@
+.file "cbrtf.asm"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
+// Bob Norin, Shane Story, and Ping Tak Peter Tang
+// of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00: Initial version
+// 5/18/00: New version (modified algorithm)
+//
+// API
+//==============================================================
+// float cbrtf(float)
+//
+// Overview of operation
+//==============================================================
+// Background
+//
+// Implementation
+//
+// cbrt(a) = cbrt(a y) / cbrt(y)
+// = cbrt(1 - (1 - a y)) * 1/cbrt(y)
+//
+// where y = frcpa(a).
+//
+// * cbrt(1 - (1 - a y)) is approximated by a degree-2 polynomial
+//
+// 1 - (1/3)*r - (1/9)*r^2
+//
+// in r = 1 - a y.
+//
+// * The values 1/cbrt(y) are stored in a table of constants T0
+// to 64 bits of accuracy
+//
+// The table values are stored for three exponent values and are
+// then multiplied by e/3 where e is the exponent of the input number.
+// This computation is carried out in parallel with the polynomial
+// evaluation:
+//
+// T = 2^(e/3) * T0
+
+
+
+
+
+//===============
+// input = x
+// C = frcpa(x)
+// r = 1 - C * x
+//
+// Special values
+//==============================================================
+
+
+
+// Registers used
+//==============================================================
+// f6-f15
+// r2, r23-r26, r28-r30
+// p6,p7,p8,p12
+
+#include "libm_support.h"
+
+// Data tables
+//==============================================================
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
+.align 16
+
+poly_coeffs:
+ASM_TYPE_DIRECTIVE(poly_coeffs,@object)
+data8 0xaaaaaaaaaaaaaaab, 0x00003ffd // 1/3
+data8 0xe38e38e38e38e38e, 0x00003ffb // 1/9
+ASM_SIZE_DIRECTIVE(poly_coeffs)
+
+
+T_table:
+ASM_TYPE_DIRECTIVE(T_table,@object)
+
+data8 0x80155c748c374836, 0xa160019ed37fb4ae
+data8 0xcb51ddcb9e93095e, 0x8040404b0879f7f9
+data8 0xa1960b5966da4608, 0xcb95f333968ad59b
+data8 0x806b5dce4b405c10, 0xa1cc5dbe6dc2aab4
+data8 0xcbda64292d3ffd97, 0x8096b586974669b1
+data8 0xa202f97995b69c0d, 0xcc1f3184af961596
+data8 0x80bcd273d952a028, 0xa232fe6eb0c0577d
+data8 0xcc5bb1ac954d33e2, 0x80e898c52813f2f3
+data8 0xa26a2582012f6e17, 0xcca12e9831fc6402
+data8 0x81149add67c2d208, 0xa2a197e5d10465cb
+data8 0xcce70a67b64f24ad, 0x813b4e2c856b6e9a
+data8 0xa2d25a532efefbc8, 0xcd24794726477ea5
+data8 0x8167c1dde03de7aa, 0xa30a5bd6e49e4ab8
+data8 0xcd6b096a0b70ee87, 0x818ed973b811135e
+data8 0xa33b9c9b59879e24, 0xcda9177738b15a90
+data8 0x81bbc0c33e13ec98, 0xa3742fca6a3c1f21
+data8 0xcdf05f2247dffab9, 0x81e33e69fbe7504a
+data8 0xa3a5f1273887bf22, 0xce2f0f347f96f906
+data8 0x820aec524e3c23e9, 0xa3d7ef508ff11574
+data8 0xce6e0be0cd551a61, 0x823880f78e70b805
+data8 0xa4115ce30548bc15, 0xceb666b2c347d1de
+data8 0x826097a62a8e5200, 0xa443df0e53df577a
+data8 0xcef609b0cb874f00, 0x8288dfe00e9b5eaf
+data8 0xa4769fa5913c0ec3, 0xcf35fb5447e5c765
+data8 0x82b15a10c5371624, 0xa4a99f303bc7def5
+data8 0xcf763c47ee869f00, 0x82da06a527b18937
+data8 0xa4dcde37779adf4b, 0xcfb6cd3888d71785
+data8 0x8302e60b635ab394, 0xa5105d46152c938a
+data8 0xcff7aed4fbfbb447, 0x832bf8b2feec2f0e
+data8 0xa5441ce89825cb8d, 0xd038e1ce5167e3c6
+data8 0x83553f0ce00e276b, 0xa5781dad3e54d899
+data8 0xd07a66d7bfa0ebba, 0x837eb98b50f8322a
+data8 0xa5ac602406c4e68c, 0xd0bc3ea6b32d1b21
+data8 0x83a270f44c84f699, 0xa5d9601d95c2c0bc
+data8 0xd0f4f0e8f36c1bf8, 0x83cc4d7cfcfac5ca
+data8 0xa60e1e1a2de14745, 0xd1376458e34b037e
+data8 0x83f65f78a8872b4c, 0xa6431f6e3fbd9658
+data8 0xd17a2ca133f78572, 0x8420a75f2f7b53c8
+data8 0xa67864b0d432fda4, 0xd1bd4a80301c5715
+data8 0x844510461ff14209, 0xa6a6444aa0243c0b
+data8 0xd1f71682b2fa4575, 0x846fbd91b930bed2
+data8 0xa6dc094d10f25792, 0xd23ad555f773f059
+data8 0x84947e18234f3294, 0xa70a574cc02bba69
+data8 0xd2752c7039a5bf73, 0x84bf92755825045a
+data8 0xa7409e2af9549084, 0xd2b98ee008c06b59
+data8 0x84e4ac0ee112ba51, 0xa76f5c64ca2cf13b
+data8 0xd2f4735ffd700280, 0x8509ef44b86f20be
+data8 0xa79e4f0babab5dc0, 0xd32f99ed6d9ac0e1
+data8 0x85359d5d91768427, 0xa7d5579ae5164b85
+data8 0xd374f0666c75d51c, 0x855b3bd5b7384357
+data8 0xa804bd3c6fe61cc8, 0xd3b0a7d13618e4a1
+data8 0x858104f0c415f79a, 0xa8345895e5250a5a
+data8 0xd3eca2ea53bcec0c, 0x85a6f90390d29864
+data8 0xa8642a122b44ef0b, 0xd428e23874f13a17
+data8 0x85d3772fcd56a1dd, 0xa89c38ca18f6108b
+data8 0xd46f82fe293bc6d3, 0x85f9c982fcc002f3
+data8 0xa8cc81063b6e87ca, 0xd4ac57e9b7186420
+data8 0x862047e0e7ea554b, 0xa8fd00bfa409285e
+data8 0xd4e972becb04e8b8, 0x8646f2a26f7f5852
+data8 0xa92db8664d5516da, 0xd526d40a7a9b43a3
+data8 0x866dca21754096b5, 0xa95ea86b75cc2c20
+data8 0xd5647c5b73917370, 0x8694ceb8dfd17a37
+data8 0xa98fd141a4992deb, 0xd5a26c4201bd6d13
+data8 0x86bc00c49e9307e8, 0xa9c1335cae7446ba
+data8 0xd5e0a45015350a7e, 0x86dccd74fce79610
+data8 0xa9ea8686f556f645, 0xd614b539c6194104
+data8 0x870453c845acf90f, 0xaa1c52d17906bb19
+data8 0xd6537310e224283f, 0x872c089a1e90342c
+data8 0xaa4e59b046dab887, 0xd6927ab62244c917
+data8 0x8753ec4a92d16c5e, 0xaa809b9c60d1890b
+data8 0xd6d1ccc1fc4ef4b7, 0x877bff3aca19f6b4
+data8 0xaab319102f3f9b33, 0xd71169cea98fdded
+data8 0x879d88b6fe1c324c, 0xaadd5a18c1e21274
+data8 0xd746a66a5bc9f6d9, 0x87c5f346dbf98c3a
+data8 0xab1045f2ac31bdf5, 0xd786ce8f0fae5317
+data8 0x87e7c653efacef2c, 0xab3ae3ab2df7231e
+data8 0xd7bc7ff214c4e75a, 0x881089d4e73ffefc
+data8 0xab6e3f945d1e96fc, 0xd7fd35467a517ed1
+data8 0x88397e6a366f2a8a, 0xaba1d953a08fa94e
+data8 0xd83e38838648d815, 0x885bc559e5e1c081
+data8 0xabcd090db7ef4c3f, 0xd874a1db598b8951
+data8 0x887e2ee392bb7a93, 0xabf864602d7c323d
+data8 0xd8ab42205b80edaf, 0x88a7a8587e404257
+data8 0xac2ca5886ccf9b57, 0xd8ed1849d202f965
+data8 0x88ca5eda67594784, 0xac5861d4aa441f0f
+data8 0xd92432bd5a173685, 0x88f4356166bd590e
+data8 0xac8d183fe3a2fbed, 0xd9669ca45b03c23e
+data8 0x89173a0acf5ce026, 0xacb93703ff51571e
+data8 0xd99e3327cf89574e, 0x893a62a098b6a57b
+data8 0xace5830ad0c3f14b, 0xd9d602b19b100466
+data8 0x895daf637236ae2c, 0xad11fca5d78b3ff2
+data8 0xda0e0ba86c096841, 0x89883b9d1c2fa9c5
+data8 0xad4797fddf91a798, 0xda5195fcdb1c3dce
+data8 0x89abd8dd374a5d7b, 0xad747701e559ebcb
+data8 0xda8a1eb87a491f6c, 0x89cf9b1dcd197fa0
+data8 0xada184a47e9c7613, 0xdac2e230b91c3f84
+data8 0x89f382a258ea79de, 0xadcec13ab0dda8ff
+data8 0xdafbe0d0b66aea30, 0x8a178faf06648f29
+data8 0xadfc2d1a5fd21ba8, 0xdb351b04a8fafced
+data8 0x8a3bc288b3e1d18a, 0xae29c89a5053c33a
+data8 0xdb6e9139e33cdd8e, 0x8a601b74f4d1f835
+data8 0xae5794122b638df9, 0xdba843ded7151ea1
+data8 0x8a849aba14274764, 0xae858fda8137ae0a
+data8 0xdbe2336319b61fc8, 0x8aa9409f16cdbc9b
+data8 0xaeb3bc4ccc56d3d1, 0xdc1c60376789fa68
+data8 0x8ace0d6bbe2cb316, 0xaee219c374c09920
+data8 0xdc56cacda82d0cd5, 0x8af301688ab33558
+data8 0xaf10a899d3235fe7, 0xdc917398f2797814
+data8 0x8b181cdebe6f3206, 0xaf3f692c341fe8b4
+data8 0xdccc5b0d90a3e628, 0x8b3d60185fafcb7c
+data8 0xaf6e5bd7db9ae6c2, 0xdd0781a10469f0f2
+data8 0x8b62cb603bb2fad0, 0xaf9d80fb081cd91b
+data8 0xdd42e7ca0b52838f, 0x8b80d7d6bc4104de
+data8 0xafc35ce063eb3787, 0xdd729ad01c69114d
+data8 0x8ba68bf73ac74f39, 0xaff2ddcb5f28f03d
+data8 0xddae749c001fbf5e, 0x8bcc68fb9f9f7335
+data8 0xb022923b148e05c5, 0xddea8f50a51c69b1
+data8 0x8bf26f31c534fca2, 0xb0527a919adbf58b
+data8 0xde26eb69a0f0f111, 0x8c10f86e13a1a1f9
+data8 0xb078f3ab1d701c65, 0xde576480262399bc
+data8 0x8c3749916cc6abb5, 0xb0a93a6870649f31
+data8 0xde943789645933c8, 0x8c5dc4c4f7706032
+data8 0xb0d9b624d62ec856, 0xded14d58139a28af
+data8 0x8c7cac3a8c42e3e0, 0xb100a5f53fb3c8e1
+data8 0xdf025c00bbf2b5c7, 0x8ca373f1b7bf2716
+data8 0xb131821882f5540a, 0xdf3feb44d723a713
+data8 0x8cc29907fb951294, 0xb158bf8e4cb04055
+data8 0xdf715bc16c159be0, 0x8ce9ae4e9492aac8
+data8 0xb189fd69d56b238f, 0xdfaf66240e29cda8
+data8 0x8d0911dddbfdad0e, 0xb1b189958e8108e4
+data8 0xdfe139cbf6e19bdc, 0x8d3075c4f20f04ee
+data8 0xb1e32a8165b09832, 0xe01fc0fe94d9fc52
+data8 0x8d5018a9d4de77d5, 0xb20b0678fc271eec
+data8 0xe051f92ffcc0bd60, 0x8d77cc47dd143515
+data8 0xb23d0bd3f7592b6e, 0xe090feec9c9a06ac
+data8 0x8d97af6352739cb7, 0xb26538b2db8420dc
+data8 0xe0c39d0c9ff862d6, 0x8db7af523167800f
+data8 0xb28d89e339ceca14, 0xe0f668eeb99f188d
+data8 0x8ddfd80bc68c32ff, 0xb2c022ca12e55a16
+data8 0xe1362890eb663139, 0x8e00197e1e7c88fe
+data8 0xb2e8c6852c6b03f1, 0xe1695c7212aecbaa
+data8 0x8e207859f77e20e7, 0xb3118f4eda9fe40f
+data8 0xe19cbf0391bbbbe9, 0x8e40f4ce60c9f8e2
+data8 0xb33a7d6268109ebe, 0xe1d050901c531e85
+data8 0x8e69ba46cf2fde4d, 0xb36ddbc5ea70ec55
+data8 0xe2110903b4f4047a, 0x8e8a7a00bd7ae63e
+data8 0xb3971e9b39264023, 0xe2450559b4d80b6d
+data8 0x8eab57ef1cf2f529, 0xb3c0877ecc18e24a
+data8 0xe27931a231554ef3, 0x8ecc5442cffb1dad
+data8 0xb3ea16ae3a6c905f, 0xe2ad8e2ac3c5b04b
+data8 0x8eed6f2d2a4acbfe, 0xb413cc67aa0e4d2d
+data8 0xe2e21b41b9694cce, 0x8f0ea8dff24441ff
+data8 0xb43da8e9d163e1af, 0xe316d93615862714
+data8 0x8f385c95d696b817, 0xb47233773b84d425
+data8 0xe3590bd86a0d30f9, 0x8f59dc43edd930f3
+data8 0xb49c6825430fe730, 0xe38e38e38e38e38e
+data8 0x8f7b7b5f5ffad1c4, 0xb4c6c46bcdb27dcf
+data8 0xe3c397d1e6db7839, 0x8f9d3a1bea165f38
+data8 0xb4f1488c0b35d26f, 0xe3f928f5953feb9e
+data8 0x8fbf18adc34b66da, 0xb51bf4c7c51f0168
+data8 0xe42eeca17c62886c, 0x8fe117499e356095
+data8 0xb546c9616087ab9c, 0xe464e32943446305
+data8 0x90033624aa685f8d, 0xb571c69bdffd9a70
+data8 0xe49b0ce15747a8a2, 0x9025757495f36b86
+data8 0xb59cecbae56984c3, 0xe4d16a1eee94e9d4
+data8 0x903f3a5dcc091203, 0xb5bd64512bb14bb7
+data8 0xe4fa52107353f67d, 0x9061b2fceb2bdbab
+data8 0xb5e8d2a4bf5ba416, 0xe5310a471f4d2dc3
+data8 0x90844ca7211032a7, 0xb6146a9a1bc47819
+data8 0xe567f6f1c2b9c224, 0x90a7079403e6a15d
+data8 0xb6402c7749d621c0, 0xe59f18689a9e4c9a
+data8 0x90c9e3fbafd63799, 0xb66c1882fb435ea2
+data8 0xe5d66f04b8a68ecf, 0x90ece216c8a16ee4
+data8 0xb6982f048c999a56, 0xe60dfb2005c192e9
+data8 0x9110021e7b516f0a, 0xb6c47044075b4142
+data8 0xe645bd1544c7ea51, 0x912a708a39be9075
+data8 0xb6e5bd6bfd02bafd, 0xe66fb21b505b20a0
+data8 0x914dcc7b31146370, 0xb7124a2736ff8ef2
+data8 0xe6a7d32af4a7c59a, 0x91714af8cfe984d5
+data8 0xb73f026a01e94177, 0xe6e02b129c6a5ae4
+data8 0x918c00a6f3795e97, 0xb760a959f1d0a7a7
+data8 0xe70a9136a7403039, 0x91afbc299ed0295d
+data8 0xb78dae7e06868ab0, 0xe74349fb2d92a589
+data8 0x91d39add3e958db0, 0xb7badff8ad9e4e02
+data8 0xe77c3a9c86ed7d42, 0x91ee9920a8974d92
+data8 0xb7dce25b8e17ae9f, 0xe7a713f88151518a
+data8 0x9212b5fcac537c19, 0xb80a6226904045e2
+data8 0xe7e067453317ed2b, 0x9236f6b256923fcf
+data8 0xb8380f1cafd73c1c, 0xe819f37a81871bb5
+data8 0x92523ee6f90dcfc3, 0xb85a6ea8e321b4d8
+data8 0xe8454236bfaeca14, 0x9276bef031e6eb79
+data8 0xb8886b684ae7d2fa, 0xe87f32f24c3fc90e
+data8 0x929236ec237a24ad, 0xb8ab0726fa00cf5d
+data8 0xe8aacd8688892ba6, 0x92b6f70b7efe9dc3
+data8 0xb8d954a4d13b7cb1, 0xe8e523fd32f606f7
+data8 0x92d29f61eec7dc2b, 0xb8fc2d4f6cd9f04a
+data8 0xe9110b5311407927, 0x92f7a05d5b8ba92f
+data8 0xb92acc851476b1ab, 0xe94bc8bf0c108fa3
+data8 0x931379a403be5c16, 0xb94de2d841a184c2
+data8 0xe977fdc439c2ca3c, 0x9338bc44de2e3f34
+data8 0xb97cd4c36c92693c, 0xe9b3236528fc349e
+data8 0x9354c71412c69486, 0xb9a0297f172665e3
+data8 0xe9dfa70b745ac1b4, 0x937a4c273907e262
+data8 0xb9cf6f21e36c3924, 0xea1b36268d0eaa38
+data8 0x93968919f6e7975d, 0xb9f3030951267208
+data8 0xea480963fd394197, 0x93bc516fdd4680c9
+data8 0xba229d6a618e7c59, 0xea84034425f27484
+data8 0x93d8c123d9be59b2, 0xba467144459f9855
+data8 0xeab12713138dd1cc, 0x93f546c955e60076
+data8 0xba6a60c3c48f1a4b, 0xeade6db73a5e503b
+data8 0x941b70a65879079f, 0xba9a76056b67ee7a
+data8 0xeb1b0268343b121b, 0x943829f337410591
+data8 0xbabea699563ada6e, 0xeb489b0b2bdb5f14
+data8 0x9454f995765bc4d2, 0xbae2f350b262cc4b
+data8 0xeb765721e85f03d0, 0x947b86b57f5842ed
+data8 0xbb1385a23be24e57, 0xebb389645f222f62
+data8 0x94988aeb23470f86, 0xbb3814975e17c680
+data8 0xebe198f090607e0c, 0x94b5a5dc9695f42a
+data8 0xbb5cc031009bf467, 0xec0fcc9321024509
+data8 0x94d2d7a9170d8b42, 0xbb81889680024764
+data8 0xec3e247da8b82f61, 0x94f9e87dd78bf019
+data8 0xbbb2c0d8703ae95d, 0xec7c27d21321c9f7
+data8 0x95175019a503d89e, 0xbbd7cd09ba3c5463
+data8 0xecaad5278824e453, 0x9534cefa625fcb3a
+data8 0xbbfcf68c4977718f, 0xecd9a76d097d4e77
+data8 0x955265405c491a25, 0xbc223d88cfc88eee
+data8 0xed089ed5dcd99446, 0x9570130c1f9bb857
+data8 0xbc47a2284fee4ff8, 0xed37bb95add09a1c
+data8 0x9597ca4119525184, 0xbc79ac0916ed7b8a
+data8 0xed76c70508f904b6, 0x95b5af6fb5aa4d3c
+data8 0xbc9f5670d1a13030, 0xeda63bb05e7f93c6
+data8 0x95d3ac9273aafd7a, 0xbcc51f068cb95c1d
+data8 0xedd5d661daed2dc4, 0x95f1c1cafdfd3684
+data8 0xbceb05f4b30a9bc0, 0xee05974eef86b903
+data8 0x960fef3b430b8d5f, 0xbd110b6604c7d306
+data8 0xee357ead791fc670, 0x962e350575b409c5
+data8 0xbd372f8598620f19, 0xee658cb3c134a463
+data8 0x964c934c0dfc1708, 0xbd5d727edb6b3c7e
+data8 0xee95c1987f080211, 0x966b0a31c9c6bc7d
+data8 0xbd83d47d937bbc6d, 0xeec61d92d8c4314f
+data8 0x968999d9ad8d264e, 0xbdaa55addf1ae47d
+data8 0xeef6a0da64a014ac, 0x96a8426705198795
+data8 0xbdd0f63c36aa73f0, 0xef274ba72a07c811
+data8 0x96c703fd64445ee5, 0xbdf7b6556d550a15
+data8 0xef581e31a2c91260, 0x96e5dec0a7b4268d
+data8 0xbe1e9626b1ffa96b, 0xef8918b2bc43aec6
+data8 0x9704d2d4f59f79f3, 0xbe4595dd903e5371
+data8 0xefba3b63d89d7cbf, 0x9723e05ebe91b9b0
+data8 0xbe6cb5a7f14bc935, 0xefeb867ecffaa607
+data8 0x97430782be323831, 0xbe93f5b41d047cf7
+data8 0xf01cfa3df1b9c9fa, 0x97624865fc0df8bf
+data8 0xbebb5630bae4c15f, 0xf04e96dc05b43e2d
+data8 0x9781a32dcc640b2a, 0xbee2d74cd30a430c
+data8 0xf0805c944d827454, 0x97a117ffd0f48e46
+data8 0xbf0a7937cf38d981, 0xf0b24ba285c495cb
+data8 0x97c0a701f9d263c9, 0xbf323c217be2bc8c
+data8 0xf0e46442e76f6569, 0x97e0505a8637a036
+data8 0xbf5a203a09342bbb, 0xf116a6b2291d7896
+data8 0x97f57a9fb0b08c6e, 0xbf74cad1c14ebfc4
+data8 0xf1383fa9e9b5b381, 0x9815503365914a9d
+data8 0xbf9ce6a497a89f78, 0xf16ac84f90083b9b
+data8 0x98354085054fd204, 0xbfc52428bec6e72f
+data8 0xf19d7b686dcb03d7, 0x98554bbbf8a77902
+data8 0xbfed838fddab024b, 0xf1d0593311db1757
+data8 0x987571fffb7f94f6, 0xc016050c0420981a
+data8 0xf20361ee8f1c711e, 0x9895b3791dd03c23
+data8 0xc03ea8cfabddc330, 0xf23695da7de51d3f
+data8 0x98ab43a5fc65d0c8, 0xc059d3cbd65ddbce
+data8 0xf258d095e465cc35, 0x98cbb2d196bd713d
+data8 0xc082b122a3c78c9d, 0xf28c4d0bfc982b34
+data8 0x98ec3d9ec7b6f21a, 0xc0abb1499ae736c4
+data8 0xf2bff55eb3f0ea71, 0x990ce436db5e8344
+data8 0xc0d4d474c3aedaaf, 0xf2f3c9cf9884636e
+data8 0x9922b8218160967a, 0xc0f054ca33eb3437
+data8 0xf31670135ab9cc0f, 0x99438d686f75779d
+data8 0xc119b2c67e600ed0, 0xf34a8e9f0b54cdfb
+data8 0x99647eea131fa20b, 0xc1433453de2033ff
+data8 0xf37ed9fa6b8add3f, 0x997a85045a47c6d0
+data8 0xc15ef3e44e10032d, 0xf3a1cfe884ef6bb6
+data8 0x999ba5f14f8add02, 0xc188b130431d80e6
+data8 0xf3d66689dcc8e8d3, 0x99bce38b5465ecae
+data8 0xc1b2929d6067730e, 0xf40b2ab069d5c96a
+data8 0x99d31ca0887f30f9, 0xc1ce9268f31cc734
+data8 0xf42e718b90c8bc16, 0x99f48a669c74c09e
+data8 0xc1f8b0877c1b0c08, 0xf463822a0a3b4b00
+data8 0x9a16154eb445c873, 0xc222f35a87b415ba
+data8 0xf498c1076015faf8, 0x9a2c822ec198d667
+data8 0xc23f3467349e5c88, 0xf4bc5a19a33990b5
+data8 0x9a4e3e080cd91b78, 0xc269b4e40e088c01
+data8 0xf4f1e6a7d6f5425f, 0x9a70177afe52322e
+data8 0xc2945aac24daaf6e, 0xf527a232cf6be334
+data8 0x9a86b8fa94eebe10, 0xc2b0de05e43c1d66
+data8 0xf54b8ecdcda90851, 0x9aa8c42866ae2958
+data8 0xc2dbc275e1229d09, 0xf5819949c7ad87b4
+data8 0x9abf86f9e12fc45e, 0xc2f86fca9d80eeff
+data8 0xf5a5bac9213b48a9, 0x9ae1c462fc05f49d
+data8 0xc323938449a2587e, 0xf5dc1501f324a812
+data8 0x9af8a8dc936b84d0, 0xc3406b40a538ed20
+data8 0xf6006bee86b5589e, 0x9b1b19033be35730
+data8 0xc36bcee8211d15e0, 0xf63716b2fa067fa4
+data8 0x9b3da7daf04c2892, 0xc397593adf2ba366
+data8 0xf66df22fb6132b9c, 0x9b54c2e4c8a9012b
+data8 0xc3b475b6206155d5, 0xf6929fb98225deb1
+data8 0x9b77854e6c661200, 0xc3e0410243b97383
+data8 0xf6c9cd13021e3fea, 0x9b8ec2e678d56d2f
+data8 0xc3fd890709833d37, 0xf6eeb177472cedae
+data8 0x9ba60e6a5ca133b6, 0xc41ae295f7e7fa06
+data8 0xf713abf4cb0b3afb, 0x9bc919ea66a151a4
+data8 0xc44709f7bb8a4dd2, 0xf74b4d5333684ef1
+data8 0x9be0887c09ef82bb, 0xc4648fb0e0bec4c1
+data8 0xf7707f75a72f8e94, 0x9c03c8d5fffc3503
+data8 0xc490f9a94695ba14, 0xf7a874b97927af44
+data8 0x9c1b5ad21a81cbb9, 0xc4aeac0173b7d390
+data8 0xf7cddf140aedf1d8, 0x9c3ed09216e9ca02
+data8 0xc4db5941007aa853, 0xf806291bacb7f7a9
+data8 0x9c568656c0423def, 0xc4f938aec206291a
+data8 0xf82bcc43b92eafef, 0x9c7a320af242ce60
+data8 0xc52629e899dfd622, 0xf8646bf0defb759e
+data8 0x9c920bf7a8c01dc2, 0xc54436e44043b965
+data8 0xf88a487dfc3ff5f7, 0x9ca9f475d98b159c
+data8 0xc562563abf9ea07f, 0xf8b03c2b46cdc17f
+data8 0x9ccdeca60e80b5f8, 0xc58fa7d1dc42921c
+data8 0xf8e95541c152ae7a, 0x9ce5f9d4653d4902
+data8 0xc5adf561b91e110a, 0xf90f832c2700c160
+data8 0x9cfe15cb38bfdd8e, 0xc5cc5591bdbd82fa
+data8 0xf935c88e0c7f419b, 0x9d225b983f6c1f96
+data8 0xc5fa08f1ff20593c, 0xf96f5cd84fd86873
+data8 0x9d3a9cca32261ed7, 0xc618980a79ce6862
+data8 0xf995dd53ebdd9d6d, 0x9d52ecfccebe1768
+data8 0xc6373a09e34b50fa, 0xf9bc75a034436a41
+data8 0x9d77818d95b82f86, 0xc66550a6e0baaf35
+data8 0xf9f686f26d5518de, 0x9d8ff7893fa4706c
+data8 0xc6842241926342c9, 0xfa1d5b39b910a8c5
+data8 0x9da87cbef36f2a5e, 0xc6a3070b7c93bb9e
+data8 0xfa4447acc4ecbfd2, 0x9dcd6140b4a35aeb
+data8 0xc6d18260bb84081b, 0xfa7ed7e51e6fdfb4
+data8 0x9de60cd06dc6e2d4, 0xc6f0977c9416828b
+data8 0xfaa601394d49a1a0, 0x9dfec7d4cc43b76f
+data8 0xc70fc0117c641630, 0xfacd431644ce0e40
+data8 0x9e17925ec9fccc4a, 0xc72efc34d7e615be
+data8 0xfaf49d96f7a75909, 0x9e3cdf6db57dc075
+data8 0xc75dfb441594141e, 0xfb2fd3c65e562fd5
+data8 0x9e55d110b63637a8, 0xc77d68aa019bda4c
+data8 0xfb576c5762024805, 0x9e6ed27594550d2e
+data8 0xc79ce9ea478dbc4f, 0xfb7f1debc22c4040
+data8 0x9e87e3adc385d393, 0xc7bc7f1ae453219d
+data8 0xfba6e89f32d0190a, 0x9ead9b54b37a1055
+data8 0xc7ec0476e15e141a, 0xfbe2c803a0894893
+data8 0x9ec6d46a3d7de215, 0xc80bcbe16f1d540f
+data8 0xfc0ad1ff0ed9ecf0, 0x9ee01d9108be3154
+data8 0xc82ba78a5d349735, 0xfc32f57bdfbcbe7f
+data8 0x9ef976db07288d04, 0xc84b978847a06b87
+data8 0xfc5b32968f99b21c, 0x9f12e05a4759ec25
+data8 0xc86b9bf1ee817bc6, 0xfc83896bc861ab08
+data8 0x9f2c5a20f4da6668, 0xc88bb4de3667cdf4
+data8 0xfcabfa1861ed4815, 0x9f52af78ed1733ca
+data8 0xc8bc00e7fe9e23a3, 0xfce8d3cea7d3163e
+data8 0x9f6c52426a39d003, 0xc8dc4d7ff2d25232
+data8 0xfd118595143ee273, 0x9f860593d42fd7f3
+data8 0xc8fcaeebcb40eb47, 0xfd3a519943d4865a
+data8 0x9f9fc97fdb96bd51, 0xc91d25431426a663
+data8 0xfd6337f8e1ae5a4b, 0x9fb99e194f4a7037
+data8 0xc93db09d7fdb2949, 0xfd8c38d1c8e927eb
+data8 0x9fd383731ca51db9, 0xc95e5112e721582a
+data8 0xfdb5544205095a53, 0x9fed79a04fbf9423
+data8 0xc97f06bb49787677, 0xfdde8a67d2613531
+data8 0xa00780b413b24ee8, 0xc99fd1aecd6e1b06
+data8 0xfe07db619e781611, 0xa02eab2c4474b0cd
+data8 0xc9d12a3e27bb1625, 0xfe460768d80bf758
+data8 0xa048dcd51ccfd142, 0xc9f22ad82ba3d5f0
+data8 0xfe6f9bfb06cd32f6, 0xa0631fa894b11b8d
+data8 0xca134113105e67b2, 0xfe994bcd3d14fcc2
+data8 0xa07d73ba65e680af, 0xca346d07b045a876
+data8 0xfec316fecaf3f2ab, 0xa097d91e6aaf71b0
+data8 0xca55aecf0e94bb88, 0xfeecfdaf33fadb80
+data8 0xa0b24fe89e02602f, 0xca77068257be9bab
+data8 0xff16fffe2fa8fad6, 0xa0ccd82d1bd2f68b
+data8 0xca98743ae1c693a8, 0xff411e0ba9db886d
+data8 0xa0e77200215909e6, 0xcab9f8122c99a101
+data8 0xff6b57f7c33e4e9a, 0xa1021d760d584855
+data8 0xcadb9221e268c3b5, 0xff95ade2d1bd7358
+data8 0xa11cdaa36068a57d, 0xcafd4283d8043dfd
+data8 0xffc01fed60f86fb5, 0xa137a99cbd3f880b
+data8 0xcb1f09520d37c6fb, 0xffeaae3832b63956
+ASM_SIZE_DIRECTIVE(T_table)
+
+
+
+
+
+
+.align 32
+.global cbrtf#
+
+.section .text
+.proc cbrtf#
+.align 32
+cbrtf:
+
+
+{ .mfi
+ getf.sig r28=f8
+ // will continue only for normal/denormal numbers
+(p0) fclass.nm.unc p12,p7 = f8, 0x1b
+ // r2 = pointer to C_1,C_2 followed by T_table
+ addl r2 = @ltoff(poly_coeffs), gp
+}
+{.mfi
+ // r29=bias-((2^8-1)/3) -63=0xffff-0x55-0x3f=0xff6b
+ mov r29=0xff6b
+ // normalize a
+ fma.s1 f14=f8,f1,f0
+ nop.i 0;;
+}
+{.mib
+ nop.m 0
+ (p7) cmp.eq p12,p0=r28,r0
+ nop.b 0;;
+}
+{.mfb
+ // load start address for C_1,C_2 followed by T_table
+ ld8 r2=[r2]
+ (p12) fma.s.s0 f8=f8,f1,f0
+ (p12) br.ret.spnt b0;;
+}
+{.mmf
+ // load C_1
+ ldfe f7=[r2],16
+ nop.m 0
+ // y=frcpa(a)
+ frcpa.s0 f8,p6=f1,f8;;
+}
+{.mmi
+ // load C_2
+ ldfe f9=[r2],16
+ // r28=bias-(2^8-1)
+ mov r28=0xff00
+ nop.i 0;;
+}
+{.mmi
+ // get normalized significand
+ getf.sig r23=f14
+ // get exponent
+ getf.exp r24=f14
+ mov r25=0x20000;;
+}
+{.mii
+ // get r26=sign
+ and r26=r24,r25
+ // eliminate leading 1 from r23=1st table index
+ shl r23=r23,1
+ // eliminate sign from exponent (r25)
+ andcm r25=r24,r25;;
+}
+{.mfi
+ // subtract bias from r25=exponent
+ sub r25=r25,r28
+ // r=1-a*y
+ (p6) fnma.s1 f6=f8,f14,f1
+ // r23=1st table index (y_index8 bits)
+ shr.u r23=r23,56;;
+}
+{.mii
+ // 1: exponent*=5; // (2^{16}-1)/3=0x5555
+ shladd r24=r25,2,r25
+ // r23=3*y_index
+ shladd r23=r23,1,r23;;
+ // r30=(5*expon)*16+5*expon=(0x55)*expon
+ shladd r30=r24,4,r24;;
+}
+{.mmi
+ // adjust T_table pointer by 1st index
+ shladd r2=r23,3,r2;;
+ // f10=T[0][y]
+ (p6) ldf8 f10=[r2],8
+ // r24=(0x5500)*expon
+ shl r24=r30,8;;
+}
+{.mfi
+ // f11=T[1][y]
+ (p6) ldf8 f11=[r2],8
+ // P_1=C_1+C_2*r
+ (p6) fma.s1 f7=f9,f6,f7
+ // r24=(0x5555)*expon
+ add r24=r24,r30;;
+}
+{.mmi
+ // r24=(0x5556)*expon // 0x5556=(2^{16}+2)/3
+ add r24=r24,r25;;
+ // f8=T[2][y]
+ (p6) ldf8 f8=[r2]
+ // r24=floor(expon/3)
+ shr r24=r24,16;;
+}
+{.mmi
+ nop.m 0
+ // r28=3*exponent
+ shladd r28=r24,1,r24
+ // bias exponent
+ add r24=r29,r24;;
+}
+{.mmi
+ // get remainder of exponent/3
+ sub r25=r25,r28
+ // add sign to exponent
+ or r24=r24,r26
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ // P_2=-r*P_1
+ (p6) fnma.s1 f6=f7,f6,f0
+ // remainder=0 ?
+ (p6) cmp.eq.unc p7,p8=r0,r25;;
+}
+{.mfi
+ // f14=sign*2^{exponent/3}
+ (p6) setf.exp f14=r24
+ nop.f 0
+ // remainder = 1 ?
+ (p8) cmp.eq.unc p8,p12=1,r25;;
+}
+.pred.rel "mutex",p7,p8
+{.mfi
+ nop.m 0
+ // remainder=0 -> use T=f10
+ (p7) fma.s1 f8=f10,f6,f10
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // remainder =1 -> use f11
+ (p8) fma.s1 f8=f11,f6,f11
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ // result=T+T*P_2
+ (p12) fma.s.s0 f8=f8,f6,f8
+ nop.i 0;;
+}
+{.mfb
+ nop.m 0
+ // T*=sgn*2^{expon/3}
+ (p6) fma.s.s0 f8=f8,f14,f0
+ br.ret.sptk b0;;
+}
+.endp cbrtf
+ASM_SIZE_DIRECTIVE(cbrtf)
diff --git a/sysdeps/ia64/fpu/s_cbrtl.S b/sysdeps/ia64/fpu/s_cbrtl.S
new file mode 100644
index 0000000..c44ecf7
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_cbrtl.S
@@ -0,0 +1,889 @@
+.file "cbrtl.asm"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
+// Bob Norin, Shane Story, and Ping Tak Peter Tang
+// of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 4/28/00: Initial version
+//
+// API
+//==============================================================
+// long double cbrtl(long double)
+//
+// Overview of operation
+//==============================================================
+// Background
+//
+// Implementation
+//
+// cbrt(a) = cbrt(a y) / cbrt(y)
+// = cbrt(1 - (1 - a y)) * 1/cbrt(y)
+//
+// where y = frcpa(a).
+//
+// * cbrt(1 - (1 - a y)) is approximated by a degree-6 polynomial
+//
+// 1 - c_1 r - c_2 * r^2 - c_3 * r^3 - c_4 * r^4 - c_5 * r^5 - c_6 * r^6
+//
+// in r = 1 - a y.
+//
+// * The values 1/cbrt(y) are stored as two tables of constants T_hi
+// (double-extended precision) and D (single precision) as follows:
+//
+// T_hi (1 + D) = 1/cbrt(y) to about 80 bits of accuracy
+//
+// The tables are only stored for three exponent values and are
+// then multiplied by e/3 where e is the exponent of the input number.
+// This computation is carried out in parallel with the polynomial
+// evaluation:
+//
+// T = 2^(e/3) * T_hi
+
+
+
+
+
+//===============
+// input = x
+// C = frcpa(x)
+// r = C * x - 1
+//
+// Special values
+//==============================================================
+
+
+
+// Registers used
+//==============================================================
+// f6-f15
+// r2-r3, r23-r30
+// p6,p7,p12
+
+#include "libm_support.h"
+
+// Data tables
+//==============================================================
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
+.align 16
+
+poly_coeffs:
+ASM_TYPE_DIRECTIVE(poly_coeffs,@object)
+data8 0xaaaaaaaaaaaaaab1, 0x00003ffd // C_1
+data8 0xe38e38e38e38e3e0, 0x00003ffb // C_2
+data8 0x3faf9add3c0be9a6, 0x3fa511e8d2b1f749 // C_3, C_4
+data8 0x3f9ee71b2c6ebe99, 0x3f9809180fd0340c // C_5, C_6
+ASM_SIZE_DIRECTIVE(poly_coeffs)
+
+T_table:
+ASM_TYPE_DIRECTIVE(T_table,@object)
+
+data8 0x80155c748c374836, 0x8040404b0879f7f9
+data8 0x806b5dce4b405c10, 0x8096b586974669b1
+data8 0x80bcd273d952a028, 0x80e898c52813f2f3
+data8 0x81149add67c2d208, 0x813b4e2c856b6e9a
+data8 0x8167c1dde03de7aa, 0x818ed973b811135e
+data8 0x81bbc0c33e13ec98, 0x81e33e69fbe7504a
+data8 0x820aec524e3c23e9, 0x823880f78e70b805
+data8 0x826097a62a8e5200, 0x8288dfe00e9b5eaf
+data8 0x82b15a10c5371624, 0x82da06a527b18937
+data8 0x8302e60b635ab394, 0x832bf8b2feec2f0e
+data8 0x83553f0ce00e276b, 0x837eb98b50f8322a
+data8 0x83a270f44c84f699, 0x83cc4d7cfcfac5ca
+data8 0x83f65f78a8872b4c, 0x8420a75f2f7b53c8
+data8 0x844510461ff14209, 0x846fbd91b930bed2
+data8 0x84947e18234f3294, 0x84bf92755825045a
+data8 0x84e4ac0ee112ba51, 0x8509ef44b86f20be
+data8 0x85359d5d91768427, 0x855b3bd5b7384357
+data8 0x858104f0c415f79a, 0x85a6f90390d29864
+data8 0x85d3772fcd56a1dd, 0x85f9c982fcc002f3
+data8 0x862047e0e7ea554b, 0x8646f2a26f7f5852
+data8 0x866dca21754096b5, 0x8694ceb8dfd17a37
+data8 0x86bc00c49e9307e8, 0x86dccd74fce79610
+data8 0x870453c845acf90f, 0x872c089a1e90342c
+data8 0x8753ec4a92d16c5e, 0x877bff3aca19f6b4
+data8 0x879d88b6fe1c324c, 0x87c5f346dbf98c3a
+data8 0x87e7c653efacef2c, 0x881089d4e73ffefc
+data8 0x88397e6a366f2a8a, 0x885bc559e5e1c081
+data8 0x887e2ee392bb7a93, 0x88a7a8587e404257
+data8 0x88ca5eda67594784, 0x88f4356166bd590e
+data8 0x89173a0acf5ce026, 0x893a62a098b6a57b
+data8 0x895daf637236ae2c, 0x89883b9d1c2fa9c5
+data8 0x89abd8dd374a5d7b, 0x89cf9b1dcd197fa0
+data8 0x89f382a258ea79de, 0x8a178faf06648f29
+data8 0x8a3bc288b3e1d18a, 0x8a601b74f4d1f835
+data8 0x8a849aba14274764, 0x8aa9409f16cdbc9b
+data8 0x8ace0d6bbe2cb316, 0x8af301688ab33558
+data8 0x8b181cdebe6f3206, 0x8b3d60185fafcb7c
+data8 0x8b62cb603bb2fad0, 0x8b80d7d6bc4104de
+data8 0x8ba68bf73ac74f39, 0x8bcc68fb9f9f7335
+data8 0x8bf26f31c534fca2, 0x8c10f86e13a1a1f9
+data8 0x8c3749916cc6abb5, 0x8c5dc4c4f7706032
+data8 0x8c7cac3a8c42e3e0, 0x8ca373f1b7bf2716
+data8 0x8cc29907fb951294, 0x8ce9ae4e9492aac8
+data8 0x8d0911dddbfdad0e, 0x8d3075c4f20f04ee
+data8 0x8d5018a9d4de77d5, 0x8d77cc47dd143515
+data8 0x8d97af6352739cb7, 0x8db7af523167800f
+data8 0x8ddfd80bc68c32ff, 0x8e00197e1e7c88fe
+data8 0x8e207859f77e20e7, 0x8e40f4ce60c9f8e2
+data8 0x8e69ba46cf2fde4d, 0x8e8a7a00bd7ae63e
+data8 0x8eab57ef1cf2f529, 0x8ecc5442cffb1dad
+data8 0x8eed6f2d2a4acbfe, 0x8f0ea8dff24441ff
+data8 0x8f385c95d696b817, 0x8f59dc43edd930f3
+data8 0x8f7b7b5f5ffad1c4, 0x8f9d3a1bea165f38
+data8 0x8fbf18adc34b66da, 0x8fe117499e356095
+data8 0x90033624aa685f8d, 0x9025757495f36b86
+data8 0x903f3a5dcc091203, 0x9061b2fceb2bdbab
+data8 0x90844ca7211032a7, 0x90a7079403e6a15d
+data8 0x90c9e3fbafd63799, 0x90ece216c8a16ee4
+data8 0x9110021e7b516f0a, 0x912a708a39be9075
+data8 0x914dcc7b31146370, 0x91714af8cfe984d5
+data8 0x918c00a6f3795e97, 0x91afbc299ed0295d
+data8 0x91d39add3e958db0, 0x91ee9920a8974d92
+data8 0x9212b5fcac537c19, 0x9236f6b256923fcf
+data8 0x92523ee6f90dcfc3, 0x9276bef031e6eb79
+data8 0x929236ec237a24ad, 0x92b6f70b7efe9dc3
+data8 0x92d29f61eec7dc2b, 0x92f7a05d5b8ba92f
+data8 0x931379a403be5c16, 0x9338bc44de2e3f34
+data8 0x9354c71412c69486, 0x937a4c273907e262
+data8 0x93968919f6e7975d, 0x93bc516fdd4680c9
+data8 0x93d8c123d9be59b2, 0x93f546c955e60076
+data8 0x941b70a65879079f, 0x943829f337410591
+data8 0x9454f995765bc4d2, 0x947b86b57f5842ed
+data8 0x94988aeb23470f86, 0x94b5a5dc9695f42a
+data8 0x94d2d7a9170d8b42, 0x94f9e87dd78bf019
+data8 0x95175019a503d89e, 0x9534cefa625fcb3a
+data8 0x955265405c491a25, 0x9570130c1f9bb857
+data8 0x9597ca4119525184, 0x95b5af6fb5aa4d3c
+data8 0x95d3ac9273aafd7a, 0x95f1c1cafdfd3684
+data8 0x960fef3b430b8d5f, 0x962e350575b409c5
+data8 0x964c934c0dfc1708, 0x966b0a31c9c6bc7d
+data8 0x968999d9ad8d264e, 0x96a8426705198795
+data8 0x96c703fd64445ee5, 0x96e5dec0a7b4268d
+data8 0x9704d2d4f59f79f3, 0x9723e05ebe91b9b0
+data8 0x97430782be323831, 0x97624865fc0df8bf
+data8 0x9781a32dcc640b2a, 0x97a117ffd0f48e46
+data8 0x97c0a701f9d263c9, 0x97e0505a8637a036
+data8 0x97f57a9fb0b08c6e, 0x9815503365914a9d
+data8 0x98354085054fd204, 0x98554bbbf8a77902
+data8 0x987571fffb7f94f6, 0x9895b3791dd03c23
+data8 0x98ab43a5fc65d0c8, 0x98cbb2d196bd713d
+data8 0x98ec3d9ec7b6f21a, 0x990ce436db5e8344
+data8 0x9922b8218160967a, 0x99438d686f75779d
+data8 0x99647eea131fa20b, 0x997a85045a47c6d0
+data8 0x999ba5f14f8add02, 0x99bce38b5465ecae
+data8 0x99d31ca0887f30f9, 0x99f48a669c74c09e
+data8 0x9a16154eb445c873, 0x9a2c822ec198d667
+data8 0x9a4e3e080cd91b78, 0x9a70177afe52322e
+data8 0x9a86b8fa94eebe10, 0x9aa8c42866ae2958
+data8 0x9abf86f9e12fc45e, 0x9ae1c462fc05f49d
+data8 0x9af8a8dc936b84d0, 0x9b1b19033be35730
+data8 0x9b3da7daf04c2892, 0x9b54c2e4c8a9012b
+data8 0x9b77854e6c661200, 0x9b8ec2e678d56d2f
+data8 0x9ba60e6a5ca133b6, 0x9bc919ea66a151a4
+data8 0x9be0887c09ef82bb, 0x9c03c8d5fffc3503
+data8 0x9c1b5ad21a81cbb9, 0x9c3ed09216e9ca02
+data8 0x9c568656c0423def, 0x9c7a320af242ce60
+data8 0x9c920bf7a8c01dc2, 0x9ca9f475d98b159c
+data8 0x9ccdeca60e80b5f8, 0x9ce5f9d4653d4902
+data8 0x9cfe15cb38bfdd8e, 0x9d225b983f6c1f96
+data8 0x9d3a9cca32261ed7, 0x9d52ecfccebe1768
+data8 0x9d77818d95b82f86, 0x9d8ff7893fa4706c
+data8 0x9da87cbef36f2a5e, 0x9dcd6140b4a35aeb
+data8 0x9de60cd06dc6e2d4, 0x9dfec7d4cc43b76f
+data8 0x9e17925ec9fccc4a, 0x9e3cdf6db57dc075
+data8 0x9e55d110b63637a8, 0x9e6ed27594550d2e
+data8 0x9e87e3adc385d393, 0x9ead9b54b37a1055
+data8 0x9ec6d46a3d7de215, 0x9ee01d9108be3154
+data8 0x9ef976db07288d04, 0x9f12e05a4759ec25
+data8 0x9f2c5a20f4da6668, 0x9f52af78ed1733ca
+data8 0x9f6c52426a39d003, 0x9f860593d42fd7f3
+data8 0x9f9fc97fdb96bd51, 0x9fb99e194f4a7037
+data8 0x9fd383731ca51db9, 0x9fed79a04fbf9423
+data8 0xa00780b413b24ee8, 0xa02eab2c4474b0cd
+data8 0xa048dcd51ccfd142, 0xa0631fa894b11b8d
+data8 0xa07d73ba65e680af, 0xa097d91e6aaf71b0
+data8 0xa0b24fe89e02602f, 0xa0ccd82d1bd2f68b
+data8 0xa0e77200215909e6, 0xa1021d760d584855
+data8 0xa11cdaa36068a57d, 0xa137a99cbd3f880b
+data8 0xa160019ed37fb4ae, 0xa1960b5966da4608
+data8 0xa1cc5dbe6dc2aab4, 0xa202f97995b69c0d
+data8 0xa232fe6eb0c0577d, 0xa26a2582012f6e17
+data8 0xa2a197e5d10465cb, 0xa2d25a532efefbc8
+data8 0xa30a5bd6e49e4ab8, 0xa33b9c9b59879e24
+data8 0xa3742fca6a3c1f21, 0xa3a5f1273887bf22
+data8 0xa3d7ef508ff11574, 0xa4115ce30548bc15
+data8 0xa443df0e53df577a, 0xa4769fa5913c0ec3
+data8 0xa4a99f303bc7def5, 0xa4dcde37779adf4b
+data8 0xa5105d46152c938a, 0xa5441ce89825cb8d
+data8 0xa5781dad3e54d899, 0xa5ac602406c4e68c
+data8 0xa5d9601d95c2c0bc, 0xa60e1e1a2de14745
+data8 0xa6431f6e3fbd9658, 0xa67864b0d432fda4
+data8 0xa6a6444aa0243c0b, 0xa6dc094d10f25792
+data8 0xa70a574cc02bba69, 0xa7409e2af9549084
+data8 0xa76f5c64ca2cf13b, 0xa79e4f0babab5dc0
+data8 0xa7d5579ae5164b85, 0xa804bd3c6fe61cc8
+data8 0xa8345895e5250a5a, 0xa8642a122b44ef0b
+data8 0xa89c38ca18f6108b, 0xa8cc81063b6e87ca
+data8 0xa8fd00bfa409285e, 0xa92db8664d5516da
+data8 0xa95ea86b75cc2c20, 0xa98fd141a4992deb
+data8 0xa9c1335cae7446ba, 0xa9ea8686f556f645
+data8 0xaa1c52d17906bb19, 0xaa4e59b046dab887
+data8 0xaa809b9c60d1890b, 0xaab319102f3f9b33
+data8 0xaadd5a18c1e21274, 0xab1045f2ac31bdf5
+data8 0xab3ae3ab2df7231e, 0xab6e3f945d1e96fc
+data8 0xaba1d953a08fa94e, 0xabcd090db7ef4c3f
+data8 0xabf864602d7c323d, 0xac2ca5886ccf9b57
+data8 0xac5861d4aa441f0f, 0xac8d183fe3a2fbed
+data8 0xacb93703ff51571e, 0xace5830ad0c3f14b
+data8 0xad11fca5d78b3ff2, 0xad4797fddf91a798
+data8 0xad747701e559ebcb, 0xada184a47e9c7613
+data8 0xadcec13ab0dda8ff, 0xadfc2d1a5fd21ba8
+data8 0xae29c89a5053c33a, 0xae5794122b638df9
+data8 0xae858fda8137ae0a, 0xaeb3bc4ccc56d3d1
+data8 0xaee219c374c09920, 0xaf10a899d3235fe7
+data8 0xaf3f692c341fe8b4, 0xaf6e5bd7db9ae6c2
+data8 0xaf9d80fb081cd91b, 0xafc35ce063eb3787
+data8 0xaff2ddcb5f28f03d, 0xb022923b148e05c5
+data8 0xb0527a919adbf58b, 0xb078f3ab1d701c65
+data8 0xb0a93a6870649f31, 0xb0d9b624d62ec856
+data8 0xb100a5f53fb3c8e1, 0xb131821882f5540a
+data8 0xb158bf8e4cb04055, 0xb189fd69d56b238f
+data8 0xb1b189958e8108e4, 0xb1e32a8165b09832
+data8 0xb20b0678fc271eec, 0xb23d0bd3f7592b6e
+data8 0xb26538b2db8420dc, 0xb28d89e339ceca14
+data8 0xb2c022ca12e55a16, 0xb2e8c6852c6b03f1
+data8 0xb3118f4eda9fe40f, 0xb33a7d6268109ebe
+data8 0xb36ddbc5ea70ec55, 0xb3971e9b39264023
+data8 0xb3c0877ecc18e24a, 0xb3ea16ae3a6c905f
+data8 0xb413cc67aa0e4d2d, 0xb43da8e9d163e1af
+data8 0xb47233773b84d425, 0xb49c6825430fe730
+data8 0xb4c6c46bcdb27dcf, 0xb4f1488c0b35d26f
+data8 0xb51bf4c7c51f0168, 0xb546c9616087ab9c
+data8 0xb571c69bdffd9a70, 0xb59cecbae56984c3
+data8 0xb5bd64512bb14bb7, 0xb5e8d2a4bf5ba416
+data8 0xb6146a9a1bc47819, 0xb6402c7749d621c0
+data8 0xb66c1882fb435ea2, 0xb6982f048c999a56
+data8 0xb6c47044075b4142, 0xb6e5bd6bfd02bafd
+data8 0xb7124a2736ff8ef2, 0xb73f026a01e94177
+data8 0xb760a959f1d0a7a7, 0xb78dae7e06868ab0
+data8 0xb7badff8ad9e4e02, 0xb7dce25b8e17ae9f
+data8 0xb80a6226904045e2, 0xb8380f1cafd73c1c
+data8 0xb85a6ea8e321b4d8, 0xb8886b684ae7d2fa
+data8 0xb8ab0726fa00cf5d, 0xb8d954a4d13b7cb1
+data8 0xb8fc2d4f6cd9f04a, 0xb92acc851476b1ab
+data8 0xb94de2d841a184c2, 0xb97cd4c36c92693c
+data8 0xb9a0297f172665e3, 0xb9cf6f21e36c3924
+data8 0xb9f3030951267208, 0xba229d6a618e7c59
+data8 0xba467144459f9855, 0xba6a60c3c48f1a4b
+data8 0xba9a76056b67ee7a, 0xbabea699563ada6e
+data8 0xbae2f350b262cc4b, 0xbb1385a23be24e57
+data8 0xbb3814975e17c680, 0xbb5cc031009bf467
+data8 0xbb81889680024764, 0xbbb2c0d8703ae95d
+data8 0xbbd7cd09ba3c5463, 0xbbfcf68c4977718f
+data8 0xbc223d88cfc88eee, 0xbc47a2284fee4ff8
+data8 0xbc79ac0916ed7b8a, 0xbc9f5670d1a13030
+data8 0xbcc51f068cb95c1d, 0xbceb05f4b30a9bc0
+data8 0xbd110b6604c7d306, 0xbd372f8598620f19
+data8 0xbd5d727edb6b3c7e, 0xbd83d47d937bbc6d
+data8 0xbdaa55addf1ae47d, 0xbdd0f63c36aa73f0
+data8 0xbdf7b6556d550a15, 0xbe1e9626b1ffa96b
+data8 0xbe4595dd903e5371, 0xbe6cb5a7f14bc935
+data8 0xbe93f5b41d047cf7, 0xbebb5630bae4c15f
+data8 0xbee2d74cd30a430c, 0xbf0a7937cf38d981
+data8 0xbf323c217be2bc8c, 0xbf5a203a09342bbb
+data8 0xbf74cad1c14ebfc4, 0xbf9ce6a497a89f78
+data8 0xbfc52428bec6e72f, 0xbfed838fddab024b
+data8 0xc016050c0420981a, 0xc03ea8cfabddc330
+data8 0xc059d3cbd65ddbce, 0xc082b122a3c78c9d
+data8 0xc0abb1499ae736c4, 0xc0d4d474c3aedaaf
+data8 0xc0f054ca33eb3437, 0xc119b2c67e600ed0
+data8 0xc1433453de2033ff, 0xc15ef3e44e10032d
+data8 0xc188b130431d80e6, 0xc1b2929d6067730e
+data8 0xc1ce9268f31cc734, 0xc1f8b0877c1b0c08
+data8 0xc222f35a87b415ba, 0xc23f3467349e5c88
+data8 0xc269b4e40e088c01, 0xc2945aac24daaf6e
+data8 0xc2b0de05e43c1d66, 0xc2dbc275e1229d09
+data8 0xc2f86fca9d80eeff, 0xc323938449a2587e
+data8 0xc3406b40a538ed20, 0xc36bcee8211d15e0
+data8 0xc397593adf2ba366, 0xc3b475b6206155d5
+data8 0xc3e0410243b97383, 0xc3fd890709833d37
+data8 0xc41ae295f7e7fa06, 0xc44709f7bb8a4dd2
+data8 0xc4648fb0e0bec4c1, 0xc490f9a94695ba14
+data8 0xc4aeac0173b7d390, 0xc4db5941007aa853
+data8 0xc4f938aec206291a, 0xc52629e899dfd622
+data8 0xc54436e44043b965, 0xc562563abf9ea07f
+data8 0xc58fa7d1dc42921c, 0xc5adf561b91e110a
+data8 0xc5cc5591bdbd82fa, 0xc5fa08f1ff20593c
+data8 0xc618980a79ce6862, 0xc6373a09e34b50fa
+data8 0xc66550a6e0baaf35, 0xc6842241926342c9
+data8 0xc6a3070b7c93bb9e, 0xc6d18260bb84081b
+data8 0xc6f0977c9416828b, 0xc70fc0117c641630
+data8 0xc72efc34d7e615be, 0xc75dfb441594141e
+data8 0xc77d68aa019bda4c, 0xc79ce9ea478dbc4f
+data8 0xc7bc7f1ae453219d, 0xc7ec0476e15e141a
+data8 0xc80bcbe16f1d540f, 0xc82ba78a5d349735
+data8 0xc84b978847a06b87, 0xc86b9bf1ee817bc6
+data8 0xc88bb4de3667cdf4, 0xc8bc00e7fe9e23a3
+data8 0xc8dc4d7ff2d25232, 0xc8fcaeebcb40eb47
+data8 0xc91d25431426a663, 0xc93db09d7fdb2949
+data8 0xc95e5112e721582a, 0xc97f06bb49787677
+data8 0xc99fd1aecd6e1b06, 0xc9d12a3e27bb1625
+data8 0xc9f22ad82ba3d5f0, 0xca134113105e67b2
+data8 0xca346d07b045a876, 0xca55aecf0e94bb88
+data8 0xca77068257be9bab, 0xca98743ae1c693a8
+data8 0xcab9f8122c99a101, 0xcadb9221e268c3b5
+data8 0xcafd4283d8043dfd, 0xcb1f09520d37c6fb
+data8 0xcb51ddcb9e93095e, 0xcb95f333968ad59b
+data8 0xcbda64292d3ffd97, 0xcc1f3184af961596
+data8 0xcc5bb1ac954d33e2, 0xcca12e9831fc6402
+data8 0xcce70a67b64f24ad, 0xcd24794726477ea5
+data8 0xcd6b096a0b70ee87, 0xcda9177738b15a90
+data8 0xcdf05f2247dffab9, 0xce2f0f347f96f906
+data8 0xce6e0be0cd551a61, 0xceb666b2c347d1de
+data8 0xcef609b0cb874f00, 0xcf35fb5447e5c765
+data8 0xcf763c47ee869f00, 0xcfb6cd3888d71785
+data8 0xcff7aed4fbfbb447, 0xd038e1ce5167e3c6
+data8 0xd07a66d7bfa0ebba, 0xd0bc3ea6b32d1b21
+data8 0xd0f4f0e8f36c1bf8, 0xd1376458e34b037e
+data8 0xd17a2ca133f78572, 0xd1bd4a80301c5715
+data8 0xd1f71682b2fa4575, 0xd23ad555f773f059
+data8 0xd2752c7039a5bf73, 0xd2b98ee008c06b59
+data8 0xd2f4735ffd700280, 0xd32f99ed6d9ac0e1
+data8 0xd374f0666c75d51c, 0xd3b0a7d13618e4a1
+data8 0xd3eca2ea53bcec0c, 0xd428e23874f13a17
+data8 0xd46f82fe293bc6d3, 0xd4ac57e9b7186420
+data8 0xd4e972becb04e8b8, 0xd526d40a7a9b43a3
+data8 0xd5647c5b73917370, 0xd5a26c4201bd6d13
+data8 0xd5e0a45015350a7e, 0xd614b539c6194104
+data8 0xd6537310e224283f, 0xd6927ab62244c917
+data8 0xd6d1ccc1fc4ef4b7, 0xd71169cea98fdded
+data8 0xd746a66a5bc9f6d9, 0xd786ce8f0fae5317
+data8 0xd7bc7ff214c4e75a, 0xd7fd35467a517ed1
+data8 0xd83e38838648d815, 0xd874a1db598b8951
+data8 0xd8ab42205b80edaf, 0xd8ed1849d202f965
+data8 0xd92432bd5a173685, 0xd9669ca45b03c23e
+data8 0xd99e3327cf89574e, 0xd9d602b19b100466
+data8 0xda0e0ba86c096841, 0xda5195fcdb1c3dce
+data8 0xda8a1eb87a491f6c, 0xdac2e230b91c3f84
+data8 0xdafbe0d0b66aea30, 0xdb351b04a8fafced
+data8 0xdb6e9139e33cdd8e, 0xdba843ded7151ea1
+data8 0xdbe2336319b61fc8, 0xdc1c60376789fa68
+data8 0xdc56cacda82d0cd5, 0xdc917398f2797814
+data8 0xdccc5b0d90a3e628, 0xdd0781a10469f0f2
+data8 0xdd42e7ca0b52838f, 0xdd729ad01c69114d
+data8 0xddae749c001fbf5e, 0xddea8f50a51c69b1
+data8 0xde26eb69a0f0f111, 0xde576480262399bc
+data8 0xde943789645933c8, 0xded14d58139a28af
+data8 0xdf025c00bbf2b5c7, 0xdf3feb44d723a713
+data8 0xdf715bc16c159be0, 0xdfaf66240e29cda8
+data8 0xdfe139cbf6e19bdc, 0xe01fc0fe94d9fc52
+data8 0xe051f92ffcc0bd60, 0xe090feec9c9a06ac
+data8 0xe0c39d0c9ff862d6, 0xe0f668eeb99f188d
+data8 0xe1362890eb663139, 0xe1695c7212aecbaa
+data8 0xe19cbf0391bbbbe9, 0xe1d050901c531e85
+data8 0xe2110903b4f4047a, 0xe2450559b4d80b6d
+data8 0xe27931a231554ef3, 0xe2ad8e2ac3c5b04b
+data8 0xe2e21b41b9694cce, 0xe316d93615862714
+data8 0xe3590bd86a0d30f9, 0xe38e38e38e38e38e
+data8 0xe3c397d1e6db7839, 0xe3f928f5953feb9e
+data8 0xe42eeca17c62886c, 0xe464e32943446305
+data8 0xe49b0ce15747a8a2, 0xe4d16a1eee94e9d4
+data8 0xe4fa52107353f67d, 0xe5310a471f4d2dc3
+data8 0xe567f6f1c2b9c224, 0xe59f18689a9e4c9a
+data8 0xe5d66f04b8a68ecf, 0xe60dfb2005c192e9
+data8 0xe645bd1544c7ea51, 0xe66fb21b505b20a0
+data8 0xe6a7d32af4a7c59a, 0xe6e02b129c6a5ae4
+data8 0xe70a9136a7403039, 0xe74349fb2d92a589
+data8 0xe77c3a9c86ed7d42, 0xe7a713f88151518a
+data8 0xe7e067453317ed2b, 0xe819f37a81871bb5
+data8 0xe8454236bfaeca14, 0xe87f32f24c3fc90e
+data8 0xe8aacd8688892ba6, 0xe8e523fd32f606f7
+data8 0xe9110b5311407927, 0xe94bc8bf0c108fa3
+data8 0xe977fdc439c2ca3c, 0xe9b3236528fc349e
+data8 0xe9dfa70b745ac1b4, 0xea1b36268d0eaa38
+data8 0xea480963fd394197, 0xea84034425f27484
+data8 0xeab12713138dd1cc, 0xeade6db73a5e503b
+data8 0xeb1b0268343b121b, 0xeb489b0b2bdb5f14
+data8 0xeb765721e85f03d0, 0xebb389645f222f62
+data8 0xebe198f090607e0c, 0xec0fcc9321024509
+data8 0xec3e247da8b82f61, 0xec7c27d21321c9f7
+data8 0xecaad5278824e453, 0xecd9a76d097d4e77
+data8 0xed089ed5dcd99446, 0xed37bb95add09a1c
+data8 0xed76c70508f904b6, 0xeda63bb05e7f93c6
+data8 0xedd5d661daed2dc4, 0xee05974eef86b903
+data8 0xee357ead791fc670, 0xee658cb3c134a463
+data8 0xee95c1987f080211, 0xeec61d92d8c4314f
+data8 0xeef6a0da64a014ac, 0xef274ba72a07c811
+data8 0xef581e31a2c91260, 0xef8918b2bc43aec6
+data8 0xefba3b63d89d7cbf, 0xefeb867ecffaa607
+data8 0xf01cfa3df1b9c9fa, 0xf04e96dc05b43e2d
+data8 0xf0805c944d827454, 0xf0b24ba285c495cb
+data8 0xf0e46442e76f6569, 0xf116a6b2291d7896
+data8 0xf1383fa9e9b5b381, 0xf16ac84f90083b9b
+data8 0xf19d7b686dcb03d7, 0xf1d0593311db1757
+data8 0xf20361ee8f1c711e, 0xf23695da7de51d3f
+data8 0xf258d095e465cc35, 0xf28c4d0bfc982b34
+data8 0xf2bff55eb3f0ea71, 0xf2f3c9cf9884636e
+data8 0xf31670135ab9cc0f, 0xf34a8e9f0b54cdfb
+data8 0xf37ed9fa6b8add3f, 0xf3a1cfe884ef6bb6
+data8 0xf3d66689dcc8e8d3, 0xf40b2ab069d5c96a
+data8 0xf42e718b90c8bc16, 0xf463822a0a3b4b00
+data8 0xf498c1076015faf8, 0xf4bc5a19a33990b5
+data8 0xf4f1e6a7d6f5425f, 0xf527a232cf6be334
+data8 0xf54b8ecdcda90851, 0xf5819949c7ad87b4
+data8 0xf5a5bac9213b48a9, 0xf5dc1501f324a812
+data8 0xf6006bee86b5589e, 0xf63716b2fa067fa4
+data8 0xf66df22fb6132b9c, 0xf6929fb98225deb1
+data8 0xf6c9cd13021e3fea, 0xf6eeb177472cedae
+data8 0xf713abf4cb0b3afb, 0xf74b4d5333684ef1
+data8 0xf7707f75a72f8e94, 0xf7a874b97927af44
+data8 0xf7cddf140aedf1d8, 0xf806291bacb7f7a9
+data8 0xf82bcc43b92eafef, 0xf8646bf0defb759e
+data8 0xf88a487dfc3ff5f7, 0xf8b03c2b46cdc17f
+data8 0xf8e95541c152ae7a, 0xf90f832c2700c160
+data8 0xf935c88e0c7f419b, 0xf96f5cd84fd86873
+data8 0xf995dd53ebdd9d6d, 0xf9bc75a034436a41
+data8 0xf9f686f26d5518de, 0xfa1d5b39b910a8c5
+data8 0xfa4447acc4ecbfd2, 0xfa7ed7e51e6fdfb4
+data8 0xfaa601394d49a1a0, 0xfacd431644ce0e40
+data8 0xfaf49d96f7a75909, 0xfb2fd3c65e562fd5
+data8 0xfb576c5762024805, 0xfb7f1debc22c4040
+data8 0xfba6e89f32d0190a, 0xfbe2c803a0894893
+data8 0xfc0ad1ff0ed9ecf0, 0xfc32f57bdfbcbe7f
+data8 0xfc5b32968f99b21c, 0xfc83896bc861ab08
+data8 0xfcabfa1861ed4815, 0xfce8d3cea7d3163e
+data8 0xfd118595143ee273, 0xfd3a519943d4865a
+data8 0xfd6337f8e1ae5a4b, 0xfd8c38d1c8e927eb
+data8 0xfdb5544205095a53, 0xfdde8a67d2613531
+data8 0xfe07db619e781611, 0xfe460768d80bf758
+data8 0xfe6f9bfb06cd32f6, 0xfe994bcd3d14fcc2
+data8 0xfec316fecaf3f2ab, 0xfeecfdaf33fadb80
+data8 0xff16fffe2fa8fad6, 0xff411e0ba9db886d
+data8 0xff6b57f7c33e4e9a, 0xff95ade2d1bd7358
+data8 0xffc01fed60f86fb5, 0xffeaae3832b63956
+ASM_SIZE_DIRECTIVE(T_table)
+
+
+
+
+
+D_table:
+ASM_TYPE_DIRECTIVE(D_table,@object)
+data4 0x1e50f488, 0x1ebdc559, 0x1e649ec1, 0x9eed9b2c
+data4 0x9e511c44, 0x9ec6d551, 0x9eefe248, 0x9e313854
+data4 0x9f54ff18, 0x9d231411, 0x1ee5d63c, 0x9edf6b95
+data4 0x9f332aaa, 0x1dc92a84, 0x1f73fb7b, 0x1e32f100
+data4 0x9ea636f5, 0x9f6c3353, 0x9f405552, 0x1f33fd97
+data4 0x1e975291, 0x9e59a11e, 0x1e47b0ba, 0x9d8ad33e
+data4 0x1ea51bf6, 0x1f25d782, 0x9ecf534d, 0x1f55436f
+data4 0x1d0975e4, 0x9f0633a1, 0x1f3e840a, 0x1f523a4c
+data4 0x9f53cbbc, 0x9c8b5661, 0x9f6bc8eb, 0x1f4f6c7b
+data4 0x9ed9b376, 0x9f5b30b6, 0x1f64fa5e, 0x1cbcc3e0
+data4 0x1f343548, 0x1f62a6a2, 0x9f336abb, 0x9f1d15af
+data4 0x1f476c83, 0x1ea86421, 0x1f33b2cf, 0x9e8f1348
+data4 0x1f6fa829, 0x9f30ee3a, 0x9ebd6146, 0x1f2db598
+data4 0x1ef9600d, 0x1f5b1427, 0x9edd741b, 0x1f51ef4e
+data4 0x9f1aa57d, 0x9ee9b5e0, 0x9f17ecd7, 0x1ead71ff
+data4 0x1f6c910e, 0x9e1837df, 0x9f0f17d9, 0x9e8350dd
+data4 0x9d292f1b, 0x9e33b3ab, 0x9d6f0fe8, 0x9ed8c7cc
+data4 0x9ec598c8, 0x9d56758c, 0x1e090c1e, 0x9ed4b941
+data4 0x9f1fc4cf, 0x1f63513a, 0x9edd0abc, 0x1e3924dd
+data4 0x1f60d56f, 0x1ea84424, 0x9e88f4fb, 0x1f205c09
+data4 0x1ec9ae4e, 0x1d2d5738, 0x9f2c9f6d, 0x1e0765c2
+data4 0x1e8bbdd7, 0x9f16d9f1, 0x9ea62627, 0x1f13904c
+data4 0x1e566ab8, 0x9dca3d1a, 0x9e91f2a1, 0x9f14641c
+data4 0x9f278946, 0x1f490c1e, 0x1f575eb6, 0x1f50b3fd
+data4 0x9da32efb, 0x1ea95e59, 0x9e41e058, 0x9eada15f
+data4 0x9e4fe66c, 0x1f3abc98, 0x1f1b8d1e, 0x9ece97e4
+data4 0x1d188aed, 0x9e89b6ee, 0x1f287478, 0x9e8a161a
+data4 0x1e4749f7, 0x9e68084a, 0x1e867f33, 0x9f462b63
+data4 0x1db30792, 0x1f59a767, 0x9d1da4ae, 0x9f472a33
+data4 0x1d1e91cd, 0x9f414824, 0x9f473d4f, 0x1f4b5783
+data4 0x9f5b04b8, 0x9f5c205b, 0x1f309617, 0x9f0d6852
+data4 0x9d96a609, 0x9f0965c2, 0x9e23f467, 0x9f089884
+data4 0x9ec71458, 0x9ed6e955, 0x1e5e8691, 0x1f5b2bbc
+data4 0x9f128268, 0x1ed40f5b, 0x1dc430ce, 0x1f345986
+data4 0x1d778f72, 0x1e9b11d6, 0x9f5a40be, 0x9e07f61a
+data4 0x9ed641a7, 0x9f334787, 0x1e952fd0, 0x1edeb5e2
+data4 0x9e9f3eb1, 0x9e379fd9, 0x1f13102a, 0x9e5e80e1
+data4 0x1c757944, 0x1dae2260, 0x1f183ab7, 0x1e55d576
+data4 0x9e6bb99f, 0x9f52d7cb, 0x9e73a0f5, 0x1d4e1d14
+data4 0x9dd05b53, 0x1f2261e4, 0x9d4ee73d, 0x1ede515e
+data4 0x1f22a573, 0x9ecac348, 0x1e6a2ac0, 0x1e2787d2
+data4 0x9eb64b87, 0x1f0c69c6, 0x9f470a01, 0x9d7c1686
+data4 0x1e468ebe, 0x9f21ee2f, 0x9ee52116, 0x9e20f715
+data4 0x1ed18533, 0x9f005b38, 0x9f20cb95, 0x1da72967
+data4 0x1f1ba5d7, 0x1e2f8b16, 0x9c794f96, 0x9ca74ea3
+data4 0x1f410555, 0x9eff2b96, 0x1ce8f0b1, 0x1f0cee77
+data4 0x1f191edd, 0x9ed5fcbc, 0x1f30f242, 0x9e0ad369
+data4 0x1ed8f3c8, 0x1f52bb0e, 0x9e9ce408, 0x1f18907f
+data4 0x9ecdad40, 0x9e8af91d, 0x1d46698a, 0x9f4b93d6
+data4 0x9f3f5d33, 0x1e2e52f7, 0x9f13aeec, 0x9f3b1969
+data4 0x1f0996f4, 0x9f2a03df, 0x1e264767, 0x1f3ab1fb
+data4 0x9f3193c9, 0x9f21ce22, 0x9eab624c, 0x9ecd8fb1
+data4 0x1eaf9a85, 0x1f0c6a2c, 0x1eecbe61, 0x1f3fead9
+data4 0x1f1d3a29, 0x1e9099ce, 0x1eadd875, 0x1e4dbfb8
+data4 0x9dc640d2, 0x1f413680, 0x9f3f57b3, 0x1dfa1553
+data4 0x1ec71c6b, 0x1e00cc00, 0x9f271e55, 0x1e5a88bb
+data4 0x1f46cc2b, 0x1ee80ff9, 0x9e29c6f3, 0x1f15e229
+data4 0x9ea83d66, 0x1f37408e, 0x9dacb66e, 0x1e6f6259
+data4 0x9f106973, 0x1dd4e5ac, 0x1cbfdcc8, 0x9f231c9f
+data4 0x9e8677e4, 0x9e9e695a, 0x1efd782b, 0x9dd26959
+data4 0x9e80af69, 0x1f386fb3, 0x1f022e8c, 0x9e839967
+data4 0x1ce6796f, 0x1e4c22c2, 0x1e57ef24, 0x1e919804
+data4 0x9d7ea090, 0x1e40140a, 0x1f261b46, 0x1db75be2
+data4 0x1f145019, 0x9e3102b9, 0x9e22507b, 0x1eae813c
+data4 0x1f117e97, 0x1f282296, 0x1f3814b3, 0x1e17977b
+data4 0x1f39d6ff, 0x9f1c81b9, 0x9eb5bcad, 0x1f0f596e
+data4 0x1e757fd5, 0x9f090daa, 0x9f2532fc, 0x9eebafbb
+data4 0x1f086556, 0x9eeedde8, 0x9f32e174, 0x1e33c030
+data4 0x1f1f145a, 0x1e6e556c, 0x1e419ffb, 0x9eb6019a
+data4 0x9e872a2e, 0x1e113136, 0x1e93096f, 0x1f39be40
+data4 0x1f1665ad, 0x9db81d7d, 0x9cd29091, 0x1e3f4af7
+data4 0x9f23176c, 0x9eccf9b3, 0x1f34fc6c, 0x9ed36894
+data4 0x1ef08e06, 0x9f3b46bb, 0x9f2c850b, 0x1f1565a4
+data4 0x1e887bc3, 0x1e92629c, 0x9f11ac9e, 0x9e5579f3
+data4 0x1e4d5790, 0x9ee1c3d1, 0x9e916aec, 0x9eb8d9b8
+data4 0x1db46105, 0x1e168663, 0x1f26a942, 0x9f0f0383
+data4 0x9f079032, 0x9ecae1d8, 0x1ed3b34c, 0x9edc5ee6
+data4 0x9e8a75a7, 0x1f3c3de2, 0x9ee5041e, 0x1f08c727
+data4 0x1d02d7ae, 0x9f36adda, 0x9ef9a857, 0x9ef5cb3a
+data4 0x9eee73da, 0x9da5d629, 0x1e0e99be, 0x1e5159b9
+data4 0x1f2eac89, 0x9e8eedc5, 0x1dd0ec90, 0x1f229aff
+data4 0x1ed9c3e6, 0x1e95c55a, 0x9f0c24e4, 0x1e8afed6
+data4 0x1e599a96, 0x1e881b21, 0x1eab84b9, 0x9ba2bb0e
+data4 0x9e33ab10, 0x1f1710b5, 0x1ebfa271, 0x9e90bbc5
+data4 0x9f32515b, 0x9b32aae8, 0x1eda455c, 0x1da8186e
+data4 0x9e8917ff, 0x1ec4d08e, 0x1c90069d, 0x9f2f1d29
+data4 0x9ecee86d, 0x9f234d1f, 0x1f370724, 0x1da87496
+data4 0x1e7959f0, 0x9e8ada34, 0x1f1c7f6f, 0x1edd576b
+data4 0x9de91e8b, 0x1ec4ef89, 0x1f32078a, 0x1e9925e2
+data4 0x9d8eeccb, 0x9ea3d011, 0x1f231fdf, 0x9f1dbdfa
+data4 0x1e7507a3, 0x1ec42614, 0x9e8693cb, 0x9ec68398
+data4 0x1d5b05fb, 0x1de32119, 0x9f003429, 0x9ec16d92
+data4 0x9f095315, 0x9f119d2c, 0x9ed0c984, 0x9f090662
+data4 0x9e59aa1f, 0x9ed4e64a, 0x9f2798a7, 0x9f23624d
+data4 0x1e0467d9, 0x1f22e7e7, 0x1e915256, 0x9cb4df70
+data4 0x9e6f687c, 0x9e3c35e5, 0x9e5757ab, 0x9f031fa1
+data4 0x1f25bff7, 0x1f0e58c2, 0x1ef3ce04, 0x1f002ecb
+data4 0x9ebdc836, 0x9ed657dd, 0x9f149441, 0x9e8544b2
+data4 0x1cd8ff1e, 0x1e9bb463, 0x1eaa1c5c, 0x1f200c1a
+data4 0x1edbfbaf, 0x1f18724d, 0x9ed63c22, 0x9f08e045
+data4 0x1f13ad07, 0x9e949311, 0x9f0c50d4, 0x1e824516
+data4 0x1d5e52ba, 0x1d583fbd, 0x1e3b60a9, 0x9effe6d3
+data4 0x1f0d0508, 0x1f00be77, 0x9e404bfa, 0x9e1ca381
+data4 0x9f084dd8, 0x9e6db85d, 0x1db698e4, 0x9ebd1871
+data4 0x9ecc2679, 0x1ee68442, 0x1edb1050, 0x9dbc96a4
+data4 0x9f27c1f4, 0x1c99b756, 0x1eb4400a, 0x9f24390a
+data4 0x1d927875, 0x9f074faa, 0x1e9dc2c3, 0x1f13c0d2
+data4 0x1e3c9685, 0x9e6b6f75, 0x9db9cb31, 0x1ea5f3aa
+data4 0x9d992c61, 0x1f1015e4, 0x1f194f70, 0x9e19d2b3
+data4 0x9d89116c, 0x1f23cd35, 0x1e33d3a2, 0x1ee331b8
+data4 0x1d5ba7ec, 0x9f273788, 0x9e6907f4, 0x9ed5f912
+data4 0x9edd458d, 0x1e2ca7b2, 0x1ef81fe4, 0x1dc7ade6
+data4 0x1e876e51, 0x9f04ec89, 0x1f1da63a, 0x1ec02bd0
+data4 0x9e71326f, 0x1e7847b4, 0x1f0de618, 0x9e036cb6
+data4 0x1eec61e2, 0x1ef1758b, 0x9ee880a3, 0x1ed269d7
+data4 0x1e27edd3, 0x9e8a81a1, 0x1eacb84d, 0x9e1aad37
+data4 0x1f1aa8f7, 0x1e9bbd90, 0x1ea1b61f, 0x9ed41c2f
+data4 0x1dbb5dd6, 0x1f0ec733, 0x9df06b1b, 0x1e06fef1
+data4 0x9edede3a, 0x1edeb5e2, 0x1f0e63ee, 0x9db316bb
+data4 0x9efc1ad3, 0x1f01fbb5, 0x9cc0d078, 0x1ea28b36
+data4 0x9e9dd205, 0x9e791534, 0x1da1c8d5, 0x9e8195cc
+data4 0x1f0681a4, 0x1eeaf1e2, 0x9ef83b37, 0x9f22a92b
+data4 0x1eabc4ce, 0x1f10eefb, 0x1e06d9aa, 0x1e7cacd5
+data4 0x1f1ea087, 0x1eb21983, 0x9f100c78, 0x1e840abe
+data4 0x9efab66c, 0x1f183fa8, 0x9e84ee68, 0x9eea083d
+data4 0x9ee23a74, 0x1f1351d7, 0x9ec5d42a, 0x9f071f57
+data4 0x9ef578d9, 0x9f1aa7e7, 0x1eb02044, 0x1f151a2e
+data4 0x9c0dc8b2, 0x9ef4087a, 0x1ec12b93, 0x1c1a946b
+data4 0x1e89946f, 0x9dafe8c3, 0x1d295288, 0x9e8497ab
+data4 0x1ec000c6, 0x1e102f29, 0x1e542256, 0x1e67d44d
+data4 0x1ef688d8, 0x1f0e0f29, 0x1e67861f, 0x1e869748
+data4 0x1ee6aa6e, 0x9e4d228b, 0x9e50be5b, 0x1e9fe225
+data4 0x9ea34102, 0x9e628a3b, 0x9ed9fd83, 0x1ecd7109
+data4 0x1f1864ff, 0x1ea19b76, 0x1db0d1c9, 0x9dff519b
+data4 0x1e8fea71, 0x9ee82e9a, 0x9f08919b, 0x9ef5c8ae
+data4 0x9ee446a4, 0x1ea59444, 0x1eb74230, 0x1ea13fbf
+data4 0x9ea6a3ea, 0x1e5f2797, 0x9e0adb07, 0x9d3adadd
+data4 0x1ebf2ee2, 0x1da19bfa, 0x1e8dea6d, 0x1ec4fea9
+data4 0x1e669f22, 0x1dc5f919, 0x9ed25caa, 0x1ee475b1
+data4 0x1ed0603e, 0x9eacb35c, 0x1dc00b27, 0x1e2f9991
+data4 0x1e7b0406, 0x1eaa3387, 0x9d865bde, 0x1eb78a48
+data4 0x1c40ae2e, 0x1ee9838b, 0x9f0f0d7f, 0x1e3e5d26
+data4 0x1e99e7a6, 0x9e681ccf, 0x9e93ed65, 0x9eeb6a66
+data4 0x1e29e9af, 0x9e96f923, 0x9e74f11d, 0x9f1474da
+data4 0x1eec2ea7, 0x1ebf7aa3, 0x9c25dcca, 0x9f0553c2
+data4 0x9e599efd, 0x1d2ab490, 0x1e95d7cd, 0x9ee4b20e
+data4 0x9d988ce5, 0x9ef9787e, 0x9dbbba5b, 0x9f12c304
+data4 0x1e3b9d70, 0x1e7bcae8, 0x9d98bb6e, 0x9e8e6b01
+data4 0x9f07d03b, 0x9d67c822, 0x9f0ef69e, 0x1c7c0fe3
+data4 0x9e9bfbb9, 0x9e83b84b, 0x1efbf15e, 0x9ecfa6a6
+data4 0x9c91158e, 0x9ecf6770, 0x1ee1e3a8, 0x9dc95ec0
+data4 0x1ef603f7, 0x1d5e52ba, 0x1c477d1b, 0x9e955cd8
+data4 0x1ed665b0, 0x9e8376c4, 0x9c0ee88e, 0x1e8c989e
+data4 0x1ea2df29, 0x9d961e5c, 0x1e101813, 0x1e7fffff
+data4 0x9e5abff4, 0x1dbddd71, 0x1eb69100, 0x1e71f114
+data4 0x1e9ca798, 0x1ef62c8d, 0x9db4e55a, 0x1dbe69ce
+data4 0x9ef1c01f, 0x1f044a2a, 0x9eb9e0d7, 0x9ee59745
+data4 0x9e874803, 0x1ea0b418, 0x9e13572a, 0x1ddbb3a2
+data4 0x9ec0e391, 0x1e89fba1, 0x1ee8b261, 0x9e5d25f0
+data4 0x9ef222cb, 0x9ef135ec, 0x1ea04b9a, 0x9f04291f
+data4 0x9e969254, 0x9ee32f08, 0x9ed909d3, 0x9e362640
+data4 0x9ec20735, 0x1e50131b, 0x9ed4e049, 0x1ee8e817
+data4 0x1e1e09c0, 0x9ea643c5, 0x9e5a1ab6, 0x9e389059
+data4 0x1e560947, 0x1d02b877, 0x1e4475ab, 0x9ea9aaf6
+data4 0x1e95bc5e, 0x1eaf6afd, 0x1d43067d, 0x9d043821
+data4 0x9e97baa9, 0x1de5c4f9, 0x9e9a0069, 0x9e1b9944
+data4 0x1eb13686, 0x9eb907eb, 0x1e059589, 0x1cbd0f93
+data4 0x9eb7e6ae, 0x1e9fa175, 0x1ee5bdf4, 0x1e8052f7
+data4 0x9c80d1e3, 0x1bfbe28e, 0x9e672b3b, 0x9ecacf19
+data4 0x9e3c04be, 0x1dfe8c5c, 0x1e1ba9cb, 0x1eb40b1e
+data4 0x1ec7e7f6, 0x9d0d45b3, 0x1ef0113b, 0x9a155fa3
+data4 0x1e28ec3b, 0x1e7ca8df, 0x9d2f91b4, 0x1eccd9ed
+data4 0x9ed943bc, 0x9ccaab19, 0x9e8a5c58, 0x1ec3bca8
+data4 0x1ed78dc7, 0x9ed391a8, 0x9e938f6e, 0x9ec4a030
+data4 0x9e80346e, 0x1e7a4686, 0x9e284315, 0x9e39584c
+data4 0x1ebdc9b4, 0x9e9cfce5, 0x9ef55c65, 0x1e2941e7
+data4 0x9efbe59f, 0x1d87c41b, 0x1e40befc, 0x1e3d05b5
+data4 0x1de9ea67, 0x1ec9a21c, 0x1decb69a, 0x1df6e75a
+data4 0x9e8030ab, 0x9db20540, 0x9ef1e977, 0x1e3cdc43
+data4 0x1e0492b0, 0x9e91d872, 0x1e775346, 0x9e939978
+data4 0x1eb2714e, 0x1e49a203, 0x9e10195a, 0x1ef1ffc3
+data4 0x9ea8b709, 0x9e832e27, 0x1ed5ac3b, 0x1edb20a6
+data4 0x1e4dbd4e, 0x1efbb932, 0x1d8170ec, 0x1e6c4849
+data4 0x1f008e17, 0x1e8000c4, 0x1d855ecf, 0x9e37cb85
+data4 0x1ecffdf5, 0x1eba6519, 0x9edbe600, 0x1ea3e5e7
+data4 0x1ed4fb39, 0x1f00be77, 0x1e6f4484, 0x9e9e7107
+data4 0x9e30b29d, 0x9ee6e174, 0x1e3a2656, 0x9dd72f3f
+data4 0x9ee12138, 0x1ed16fed, 0x9ece8a02, 0x9ca5b249
+data4 0x9eafd508, 0x9ef0e9fc, 0x1d1307ac, 0x1eecee20
+data4 0x1cf60c6f, 0x9d556216, 0x9eaed175, 0x9ec919f4
+data4 0x1ec2c988, 0x1cd82772, 0x9dc99456, 0x1eab0467
+data4 0x1e89b36f, 0x1c757944, 0x1eef9abd, 0x9e98664d
+ASM_SIZE_DIRECTIVE(D_table)
+
+
+
+
+
+.align 32
+.global cbrtl#
+
+.section .text
+.proc cbrtl#
+.align 32
+cbrtl:
+
+
+{ .mfi
+ getf.sig r3=f8
+ // will continue only for normal/denormal numbers
+(p0) fclass.nm.unc p12,p7 = f8, 0x1b
+ // r2 = pointer to C_1...C_6 followed by T_table
+ addl r2 = @ltoff(poly_coeffs), gp;;
+}
+{.mfi
+ // r29=2/3*bias -63=0xaaaa-0x3f=0xaa6b
+ mov r29=0xaa6b
+ // normalize a
+ fma.s1 f14=f8,f1,f0
+ // r27 = pointer to D table
+ addl r27 = @ltoff(D_table), gp;;
+}
+{.mib
+ nop.m 0
+ (p7) cmp.eq p12,p0=r3,r0
+ nop.b 0;;
+}
+{.mfb
+ // load start address for C_1...C_6 followed by T_table
+ ld8 r2=[r2]
+ (p12) fma.s0 f8=f8,f1,f0
+ (p12) br.ret.spnt b0;;
+}
+{.mmf
+ // load C_1
+ ldfe f7=[r2],16
+ // load start address of D table
+ ld8 r27=[r27]
+ // y=frcpa(a)
+ frcpa.s0 f8,p6=f1,f8;;
+}
+{.mmi
+ // load C_2
+ ldfe f9=[r2],16;;
+ // load C_3, C_4
+ ldfpd f10,f11=[r2],16
+ nop.i 0;;
+}
+{.mmi
+ // get normalized significand
+ getf.sig r23=f14
+ // get exponent
+ getf.exp r24=f14
+ mov r25=0x20000;;
+}
+{.mii
+ // get r26=sign
+ and r26=r24,r25
+ // eliminate leading 1 from r23=2nd table index
+ shl r23=r23,1
+ // eliminate sign from exponent (r25)
+ andcm r25=r24,r25;;
+}
+{.mfi
+ // load C_5,C_6
+ (p6) ldfpd f12,f13=[r2],16
+ // r=1-a*y
+ (p6) fnma.s1 f6=f8,f14,f1
+ // 1: exponent*=5; // (2^{16}-1)/3=0x5555
+ shladd r24=r25,2,r25;;
+}
+{.mib
+ // r30=(5*expon)*16
+ shladd r30=r24,4,r0
+ // r28=3*exponent
+ shladd r28=r25,1,r25
+ nop.b 0;;
+}
+{.mmi
+ // r28=6*exponent
+ shladd r28=r28,1,r0
+ // r24=17*expon
+ add r24=r24,r30
+ // r23=2nd table index (8 bits)
+ shr.u r23=r23,56;;
+}
+{.mmi
+ // adjust T_table pointer by 2nd index
+ shladd r2=r23,3,r2
+ // adjust D_table pointer by 2nd index
+ shladd r27=r23,2,r27
+ // r30=(17*expon)*16^2
+ shl r30=r24,8;;
+}
+{.mmi
+ // r24=expon*(2^16-1)/3
+ add r24=r24,r30;;
+ // r24=expon*(2^20+2)/3=expon*0x55556
+ shladd r24=r24,4,r28
+ nop.i 0;;
+}
+{.mii
+ nop.m 0
+ // r24=floor(expon/3)
+ shr.u r24=r24,20
+ nop.i 0;;
+}
+{.mmi
+ nop.m 0
+ // r28=3*exponent
+ shladd r28=r24,1,r24
+ // bias exponent
+ add r24=r29,r24;;
+}
+{.mmi
+ // get remainder of exponent/3
+ sub r25=r25,r28;;
+ // add sign to exponent
+ or r24=r24,r26
+ // remainder <<=8
+ shl r25=r25,8;;
+}
+{.mfi
+ // adjust D_table pointer by 1st index
+ shladd r27=r25,2,r27
+ // P_1=C_1+C_2*r
+ (p6) fma.s1 f7=f9,f6,f7
+ // adjust T_table pointer by 1st index
+ shladd r2=r25,3,r2
+}
+{.mfi
+ // f14=sign*2^{exponent/3}
+ (p6) setf.exp f14=r24
+ // r2=r*r
+ (p6) fma.s1 f9=f6,f6,f0
+ nop.i 0;;
+}
+{.mfi
+ // load D
+ (p6) ldfs f15=[r27]
+ // P_2=C_3+C_4*r
+ (p6) fma.s1 f10=f11,f6,f10
+ nop.i 0
+}
+{.mfi
+ // load T
+ (p6) ldf8 f8=[r2]
+ // P_3=C_5+C_6*r
+ (p6) fma.s1 f12=f13,f6,f12
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ // P_4=D-r*P_1
+ (p6) fnma.s1 f15=f6,f7,f15
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // r3=r*r2
+ (p6) fma.s1 f6=f6,f9,f0
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ // P_5=P_2+r2*P_3
+ (p6) fma.s1 f10=f9,f12,f10
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ // T=T*(sign*2^{exponent/3})
+ (p6) fma.s1 f8=f8,f14,f0
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // P=P_4-r3*P_5
+ (p6) fnma.s1 f6=f6,f10,f15
+ nop.i 0;;
+}
+{.mfb
+ nop.m 0
+ // result=T+T*p
+ (p6) fma.s0 f8=f8,f6,f8
+ br.ret.sptk b0;;
+}
+.endp cbrtl
+ASM_SIZE_DIRECTIVE(cbrtl)
diff --git a/sysdeps/ia64/fpu/s_ceil.S b/sysdeps/ia64/fpu/s_ceil.S
new file mode 100644
index 0000000..58057c8
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_ceil.S
@@ -0,0 +1,249 @@
+.file "ceil.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+
+#include "libm_support.h"
+
+.align 32
+.global ceil#
+
+.section .text
+.proc ceil#
+.align 32
+
+// History
+//==============================================================
+// 2/02/00: Initial version
+// 6/13/00: Improved speed
+// 6/27/00: Eliminated incorrect invalid flag setting
+
+// API
+//==============================================================
+// double ceil(double x)
+
+// general input registers:
+
+ceil_GR_FFFF = r14
+ceil_GR_signexp = r15
+ceil_GR_exponent = r16
+ceil_GR_expmask = r17
+ceil_GR_bigexp = r18
+
+
+// predicate registers used:
+
+// p6 ==> Input is NaN, infinity, zero
+// p7 ==> Input is denormal
+// p8 ==> Input is <0
+// p9 ==> Input is >=0
+// p10 ==> Input is already an integer (bigger than largest integer)
+// p11 ==> Input is not a large integer
+// p12 ==> Input is a smaller integer
+// p13 ==> Input is not an even integer, so inexact must be set
+// p14 ==> Input is between -1 and 0, so result will be -0 and inexact
+
+
+// floating-point registers used:
+
+CEIL_SIGNED_ZERO = f7
+CEIL_NORM_f8 = f9
+CEIL_FFFF = f10
+CEIL_INEXACT = f11
+CEIL_FLOAT_INT_f8 = f12
+CEIL_INT_f8 = f13
+CEIL_adj = f14
+CEIL_MINUS_ONE = f15
+
+// Overview of operation
+//==============================================================
+
+// double ceil(double x)
+// Return an integer value (represented as a double) that is the smallest
+// value not less than x
+// This is x rounded toward +infinity to an integral value.
+// Inexact is set if x != ceil(x)
+// **************************************************************************
+
+// Set denormal flag for denormal input and
+// and take denormal fault if necessary.
+
+// Is the input an integer value already?
+
+// double_extended
+// if the exponent is > 1003e => 3F(true) = 63(decimal)
+// we have a significand of 64 bits 1.63-bits.
+// If we multiply by 2^63, we no longer have a fractional part
+// So input is an integer value already.
+
+// double
+// if the exponent is >= 10033 => 34(true) = 52(decimal)
+// 34 + 3ff = 433
+// we have a significand of 53 bits 1.52-bits. (implicit 1)
+// If we multiply by 2^52, we no longer have a fractional part
+// So input is an integer value already.
+
+// single
+// if the exponent is > 10016 => 17(true) = 23(decimal)
+// we have a significand of 24 bits 1.23-bits. (implicit 1)
+// If we multiply by 2^23, we no longer have a fractional part
+// So input is an integer value already.
+
+// If x is NAN, ZERO, or INFINITY, then return
+
+// qnan snan inf norm unorm 0 -+
+// 1 1 1 0 0 1 11 0xe7
+
+
+ceil:
+
+{ .mfi
+ getf.exp ceil_GR_signexp = f8
+ fcvt.fx.trunc.s1 CEIL_INT_f8 = f8
+ addl ceil_GR_bigexp = 0x10033, r0
+}
+{ .mfi
+ addl ceil_GR_FFFF = -1,r0
+ fcmp.lt.s1 p8,p9 = f8,f0
+ mov ceil_GR_expmask = 0x1FFFF ;;
+}
+
+// p7 ==> denorm
+{ .mfi
+ setf.sig CEIL_FFFF = ceil_GR_FFFF
+ fclass.m p7,p0 = f8, 0x0b
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fnorm CEIL_NORM_f8 = f8
+ nop.i 999 ;;
+}
+
+// Form 0 with sign of input in case negative zero is needed
+{ .mfi
+ nop.m 999
+ fmerge.s CEIL_SIGNED_ZERO = f8, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fsub.s1 CEIL_MINUS_ONE = f0, f1
+ nop.i 999 ;;
+}
+
+// p6 ==> NAN, INF, ZERO
+{ .mfb
+ nop.m 999
+ fclass.m p6,p10 = f8, 0xe7
+(p7) br.cond.spnt L(CEIL_DENORM) ;;
+}
+
+L(CEIL_COMMON):
+.pred.rel "mutex",p8,p9
+// Set adjustment to add to trunc(x) for result
+// If x>0, adjustment is 1.0
+// If x<=0, adjustment is 0.0
+{ .mfi
+ and ceil_GR_exponent = ceil_GR_signexp, ceil_GR_expmask
+(p9) fadd.s1 CEIL_adj = f1,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p8) fadd.s1 CEIL_adj = f0,f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p10) cmp.ge.unc p10,p11 = ceil_GR_exponent, ceil_GR_bigexp
+(p6) fnorm.d f8 = f8
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p11) fcvt.xf CEIL_FLOAT_INT_f8 = CEIL_INT_f8
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p10) fnorm.d f8 = CEIL_NORM_f8
+ nop.i 999 ;;
+}
+
+// Is -1 < x < 0? If so, result will be -0. Special case it with p14 set.
+{ .mfi
+ nop.m 999
+(p8) fcmp.gt.unc.s1 p14,p0 = CEIL_NORM_f8, CEIL_MINUS_ONE
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p14) cmp.ne p11,p0 = r0,r0
+(p14) fnorm.d f8 = CEIL_SIGNED_ZERO
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p14) fmpy.s0 CEIL_INEXACT = CEIL_FFFF,CEIL_FFFF
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p11) fadd.d f8 = CEIL_FLOAT_INT_f8,CEIL_adj
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p11) fcmp.eq.unc.s1 p12,p13 = CEIL_FLOAT_INT_f8, CEIL_NORM_f8
+ nop.i 999 ;;
+}
+
+// Set inexact if result not equal to input
+{ .mfi
+ nop.m 999
+(p13) fmpy.s0 CEIL_INEXACT = CEIL_FFFF,CEIL_FFFF
+ nop.i 999
+}
+// Set result to input if integer
+{ .mfb
+ nop.m 999
+(p12) fnorm.d f8 = CEIL_NORM_f8
+ br.ret.sptk b0 ;;
+}
+
+// Here if input denorm
+L(CEIL_DENORM):
+{ .mfb
+ getf.exp ceil_GR_signexp = CEIL_NORM_f8
+ fcvt.fx.trunc.s1 CEIL_INT_f8 = CEIL_NORM_f8
+ br.cond.sptk L(CEIL_COMMON) ;;
+}
+
+.endp ceil
+ASM_SIZE_DIRECTIVE(ceil)
diff --git a/sysdeps/ia64/fpu/s_ceilf.S b/sysdeps/ia64/fpu/s_ceilf.S
new file mode 100644
index 0000000..2636e85
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_ceilf.S
@@ -0,0 +1,249 @@
+.file "ceilf.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+
+#include "libm_support.h"
+
+.align 32
+.global ceilf#
+
+.section .text
+.proc ceilf#
+.align 32
+
+// History
+//==============================================================
+// 2/02/00: Initial version
+// 6/13/00: Improved speed
+// 6/27/00: Eliminated incorrect invalid flag setting
+
+// API
+//==============================================================
+// float ceilf(float x)
+
+// general input registers:
+
+ceil_GR_FFFF = r14
+ceil_GR_signexp = r15
+ceil_GR_exponent = r16
+ceil_GR_expmask = r17
+ceil_GR_bigexp = r18
+
+
+// predicate registers used:
+
+// p6 ==> Input is NaN, infinity, zero
+// p7 ==> Input is denormal
+// p8 ==> Input is <0
+// p9 ==> Input is >=0
+// p10 ==> Input is already an integer (bigger than largest integer)
+// p11 ==> Input is not a large integer
+// p12 ==> Input is a smaller integer
+// p13 ==> Input is not an even integer, so inexact must be set
+// p14 ==> Input is between -1 and 0, so result will be -0 and inexact
+
+
+// floating-point registers used:
+
+CEIL_SIGNED_ZERO = f7
+CEIL_NORM_f8 = f9
+CEIL_FFFF = f10
+CEIL_INEXACT = f11
+CEIL_FLOAT_INT_f8 = f12
+CEIL_INT_f8 = f13
+CEIL_adj = f14
+CEIL_MINUS_ONE = f15
+
+// Overview of operation
+//==============================================================
+
+// float ceilf(float x)
+// Return an integer value (represented as a float) that is the smallest
+// value not less than x
+// This is x rounded toward +infinity to an integral value.
+// Inexact is set if x != ceilf(x)
+// **************************************************************************
+
+// Set denormal flag for denormal input and
+// and take denormal fault if necessary.
+
+// Is the input an integer value already?
+
+// double_extended
+// if the exponent is > 1003e => 3F(true) = 63(decimal)
+// we have a significand of 64 bits 1.63-bits.
+// If we multiply by 2^63, we no longer have a fractional part
+// So input is an integer value already.
+
+// double
+// if the exponent is >= 10033 => 34(true) = 52(decimal)
+// 34 + 3ff = 433
+// we have a significand of 53 bits 1.52-bits. (implicit 1)
+// If we multiply by 2^52, we no longer have a fractional part
+// So input is an integer value already.
+
+// single
+// if the exponent is > 10016 => 17(true) = 23(decimal)
+// we have a significand of 24 bits 1.23-bits. (implicit 1)
+// If we multiply by 2^23, we no longer have a fractional part
+// So input is an integer value already.
+
+// If x is NAN, ZERO, or INFINITY, then return
+
+// qnan snan inf norm unorm 0 -+
+// 1 1 1 0 0 1 11 0xe7
+
+
+ceilf:
+
+{ .mfi
+ getf.exp ceil_GR_signexp = f8
+ fcvt.fx.trunc.s1 CEIL_INT_f8 = f8
+ addl ceil_GR_bigexp = 0x10016, r0
+}
+{ .mfi
+ addl ceil_GR_FFFF = -1,r0
+ fcmp.lt.s1 p8,p9 = f8,f0
+ mov ceil_GR_expmask = 0x1FFFF ;;
+}
+
+// p7 ==> denorm
+{ .mfi
+ setf.sig CEIL_FFFF = ceil_GR_FFFF
+ fclass.m p7,p0 = f8, 0x0b
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fnorm CEIL_NORM_f8 = f8
+ nop.i 999 ;;
+}
+
+// Form 0 with sign of input in case negative zero is needed
+{ .mfi
+ nop.m 999
+ fmerge.s CEIL_SIGNED_ZERO = f8, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fsub.s1 CEIL_MINUS_ONE = f0, f1
+ nop.i 999 ;;
+}
+
+// p6 ==> NAN, INF, ZERO
+{ .mfb
+ nop.m 999
+ fclass.m p6,p10 = f8, 0xe7
+(p7) br.cond.spnt L(CEIL_DENORM) ;;
+}
+
+L(CEIL_COMMON):
+.pred.rel "mutex",p8,p9
+// Set adjustment to add to trunc(x) for result
+// If x>0, adjustment is 1.0
+// If x<=0, adjustment is 0.0
+{ .mfi
+ and ceil_GR_exponent = ceil_GR_signexp, ceil_GR_expmask
+(p9) fadd.s1 CEIL_adj = f1,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p8) fadd.s1 CEIL_adj = f0,f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p10) cmp.ge.unc p10,p11 = ceil_GR_exponent, ceil_GR_bigexp
+(p6) fnorm.s f8 = f8
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p11) fcvt.xf CEIL_FLOAT_INT_f8 = CEIL_INT_f8
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p10) fnorm.s f8 = CEIL_NORM_f8
+ nop.i 999 ;;
+}
+
+// Is -1 < x < 0? If so, result will be -0. Special case it with p14 set.
+{ .mfi
+ nop.m 999
+(p8) fcmp.gt.unc.s1 p14,p0 = CEIL_NORM_f8, CEIL_MINUS_ONE
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p14) cmp.ne p11,p0 = r0,r0
+(p14) fnorm.s f8 = CEIL_SIGNED_ZERO
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p14) fmpy.s0 CEIL_INEXACT = CEIL_FFFF,CEIL_FFFF
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p11) fadd.s f8 = CEIL_FLOAT_INT_f8,CEIL_adj
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p11) fcmp.eq.unc.s1 p12,p13 = CEIL_FLOAT_INT_f8, CEIL_NORM_f8
+ nop.i 999 ;;
+}
+
+// Set inexact if result not equal to input
+{ .mfi
+ nop.m 999
+(p13) fmpy.s0 CEIL_INEXACT = CEIL_FFFF,CEIL_FFFF
+ nop.i 999
+}
+// Set result to input if integer
+{ .mfb
+ nop.m 999
+(p12) fnorm.s f8 = CEIL_NORM_f8
+ br.ret.sptk b0 ;;
+}
+
+// Here if input denorm
+L(CEIL_DENORM):
+{ .mfb
+ getf.exp ceil_GR_signexp = CEIL_NORM_f8
+ fcvt.fx.trunc.s1 CEIL_INT_f8 = CEIL_NORM_f8
+ br.cond.sptk L(CEIL_COMMON) ;;
+}
+
+.endp ceilf
+ASM_SIZE_DIRECTIVE(ceilf)
diff --git a/sysdeps/ia64/fpu/s_ceill.S b/sysdeps/ia64/fpu/s_ceill.S
new file mode 100644
index 0000000..443ae92
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_ceill.S
@@ -0,0 +1,249 @@
+.file "ceill.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+
+#include "libm_support.h"
+
+.align 32
+.global ceill#
+
+.section .text
+.proc ceill#
+.align 32
+
+// History
+//==============================================================
+// 2/02/00: Initial version
+// 6/13/00: Improved speed
+// 6/27/00: Eliminated incorrect invalid flag setting
+
+// API
+//==============================================================
+// double ceill(double x)
+
+// general input registers:
+
+ceil_GR_FFFF = r14
+ceil_GR_signexp = r15
+ceil_GR_exponent = r16
+ceil_GR_expmask = r17
+ceil_GR_bigexp = r18
+
+
+// predicate registers used:
+
+// p6 ==> Input is NaN, infinity, zero
+// p7 ==> Input is denormal
+// p8 ==> Input is <0
+// p9 ==> Input is >=0
+// p10 ==> Input is already an integer (bigger than largest integer)
+// p11 ==> Input is not a large integer
+// p12 ==> Input is a smaller integer
+// p13 ==> Input is not an even integer, so inexact must be set
+// p14 ==> Input is between -1 and 0, so result will be -0 and inexact
+
+
+// floating-point registers used:
+
+CEIL_SIGNED_ZERO = f7
+CEIL_NORM_f8 = f9
+CEIL_FFFF = f10
+CEIL_INEXACT = f11
+CEIL_FLOAT_INT_f8 = f12
+CEIL_INT_f8 = f13
+CEIL_adj = f14
+CEIL_MINUS_ONE = f15
+
+// Overview of operation
+//==============================================================
+
+// long double ceill(long double x)
+// Return an integer value (represented as a long double) that is the smallest
+// value not less than x
+// This is x rounded toward +infinity to an integral value.
+// Inexact is set if x != ceill(x)
+// **************************************************************************
+
+// Set denormal flag for denormal input and
+// and take denormal fault if necessary.
+
+// Is the input an integer value already?
+
+// double_extended
+// if the exponent is > 1003e => 3F(true) = 63(decimal)
+// we have a significand of 64 bits 1.63-bits.
+// If we multiply by 2^63, we no longer have a fractional part
+// So input is an integer value already.
+
+// double
+// if the exponent is >= 10033 => 34(true) = 52(decimal)
+// 34 + 3ff = 433
+// we have a significand of 53 bits 1.52-bits. (implicit 1)
+// If we multiply by 2^52, we no longer have a fractional part
+// So input is an integer value already.
+
+// single
+// if the exponent is > 10016 => 17(true) = 23(decimal)
+// we have a significand of 24 bits 1.23-bits. (implicit 1)
+// If we multiply by 2^23, we no longer have a fractional part
+// So input is an integer value already.
+
+// If x is NAN, ZERO, or INFINITY, then return
+
+// qnan snan inf norm unorm 0 -+
+// 1 1 1 0 0 1 11 0xe7
+
+
+ceill:
+
+{ .mfi
+ getf.exp ceil_GR_signexp = f8
+ fcvt.fx.trunc.s1 CEIL_INT_f8 = f8
+ addl ceil_GR_bigexp = 0x1003e, r0
+}
+{ .mfi
+ addl ceil_GR_FFFF = -1,r0
+ fcmp.lt.s1 p8,p9 = f8,f0
+ mov ceil_GR_expmask = 0x1FFFF ;;
+}
+
+// p7 ==> denorm
+{ .mfi
+ setf.sig CEIL_FFFF = ceil_GR_FFFF
+ fclass.m p7,p0 = f8, 0x0b
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fnorm CEIL_NORM_f8 = f8
+ nop.i 999 ;;
+}
+
+// Form 0 with sign of input in case negative zero is needed
+{ .mfi
+ nop.m 999
+ fmerge.s CEIL_SIGNED_ZERO = f8, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fsub.s1 CEIL_MINUS_ONE = f0, f1
+ nop.i 999 ;;
+}
+
+// p6 ==> NAN, INF, ZERO
+{ .mfb
+ nop.m 999
+ fclass.m p6,p10 = f8, 0xe7
+(p7) br.cond.spnt L(CEIL_DENORM) ;;
+}
+
+L(CEIL_COMMON):
+.pred.rel "mutex",p8,p9
+// Set adjustment to add to trunc(x) for result
+// If x>0, adjustment is 1.0
+// If x<=0, adjustment is 0.0
+{ .mfi
+ and ceil_GR_exponent = ceil_GR_signexp, ceil_GR_expmask
+(p9) fadd.s1 CEIL_adj = f1,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p8) fadd.s1 CEIL_adj = f0,f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p10) cmp.ge.unc p10,p11 = ceil_GR_exponent, ceil_GR_bigexp
+(p6) fnorm f8 = f8
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p11) fcvt.xf CEIL_FLOAT_INT_f8 = CEIL_INT_f8
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p10) fnorm f8 = CEIL_NORM_f8
+ nop.i 999 ;;
+}
+
+// Is -1 < x < 0? If so, result will be -0. Special case it with p14 set.
+{ .mfi
+ nop.m 999
+(p8) fcmp.gt.unc.s1 p14,p0 = CEIL_NORM_f8, CEIL_MINUS_ONE
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p14) cmp.ne p11,p0 = r0,r0
+(p14) fnorm f8 = CEIL_SIGNED_ZERO
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p14) fmpy.s0 CEIL_INEXACT = CEIL_FFFF,CEIL_FFFF
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p11) fadd f8 = CEIL_FLOAT_INT_f8,CEIL_adj
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p11) fcmp.eq.unc.s1 p12,p13 = CEIL_FLOAT_INT_f8, CEIL_NORM_f8
+ nop.i 999 ;;
+}
+
+// Set inexact if result not equal to input
+{ .mfi
+ nop.m 999
+(p13) fmpy.s0 CEIL_INEXACT = CEIL_FFFF,CEIL_FFFF
+ nop.i 999
+}
+// Set result to input if integer
+{ .mfb
+ nop.m 999
+(p12) fnorm f8 = CEIL_NORM_f8
+ br.ret.sptk b0 ;;
+}
+
+// Here if input denorm
+L(CEIL_DENORM):
+{ .mfb
+ getf.exp ceil_GR_signexp = CEIL_NORM_f8
+ fcvt.fx.trunc.s1 CEIL_INT_f8 = CEIL_NORM_f8
+ br.cond.sptk L(CEIL_COMMON) ;;
+}
+
+.endp ceill
+ASM_SIZE_DIRECTIVE(ceill)
diff --git a/sysdeps/ia64/fpu/s_cos.S b/sysdeps/ia64/fpu/s_cos.S
new file mode 100644
index 0000000..cd715b4
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_cos.S
@@ -0,0 +1,3488 @@
+.file "sincos.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00 Initial revision
+// 4/02/00 Unwind support added.
+// 6/16/00 Updated tables to enforce symmetry
+// 8/31/00 Saved 2 cycles in main path, and 9 in other paths.
+// 9/20/00 The updated tables regressed to an old version, so reinstated them
+// 10/18/00 Changed one table entry to ensure symmetry
+// 1/03/01 Improved speed, fixed flag settings for small arguments.
+
+// API
+//==============================================================
+// double sin( double x);
+// double cos( double x);
+//
+// Overview of operation
+//==============================================================
+//
+// Step 1
+// ======
+// Reduce x to region -1/2*pi/2^k ===== 0 ===== +1/2*pi/2^k where k=4
+// divide x by pi/2^k.
+// Multiply by 2^k/pi.
+// nfloat = Round result to integer (round-to-nearest)
+//
+// r = x - nfloat * pi/2^k
+// Do this as (x - nfloat * HIGH(pi/2^k)) - nfloat * LOW(pi/2^k) for increased accuracy.
+// pi/2^k is stored as two numbers that when added make pi/2^k.
+// pi/2^k = HIGH(pi/2^k) + LOW(pi/2^k)
+//
+// x = (nfloat * pi/2^k) + r
+// r is small enough that we can use a polynomial approximation
+// and is referred to as the reduced argument.
+//
+// Step 3
+// ======
+// Take the unreduced part and remove the multiples of 2pi.
+// So nfloat = nfloat (with lower k+1 bits cleared) + lower k+1 bits
+//
+// nfloat (with lower k+1 bits cleared) is a multiple of 2^(k+1)
+// N * 2^(k+1)
+// nfloat * pi/2^k = N * 2^(k+1) * pi/2^k + (lower k+1 bits) * pi/2^k
+// nfloat * pi/2^k = N * 2 * pi + (lower k+1 bits) * pi/2^k
+// nfloat * pi/2^k = N2pi + M * pi/2^k
+//
+//
+// Sin(x) = Sin((nfloat * pi/2^k) + r)
+// = Sin(nfloat * pi/2^k) * Cos(r) + Cos(nfloat * pi/2^k) * Sin(r)
+//
+// Sin(nfloat * pi/2^k) = Sin(N2pi + Mpi/2^k)
+// = Sin(N2pi)Cos(Mpi/2^k) + Cos(N2pi)Sin(Mpi/2^k)
+// = Sin(Mpi/2^k)
+//
+// Cos(nfloat * pi/2^k) = Cos(N2pi + Mpi/2^k)
+// = Cos(N2pi)Cos(Mpi/2^k) + Sin(N2pi)Sin(Mpi/2^k)
+// = Cos(Mpi/2^k)
+//
+// Sin(x) = Sin(Mpi/2^k) Cos(r) + Cos(Mpi/2^k) Sin(r)
+//
+//
+// Step 4
+// ======
+// 0 <= M < 2^(k+1)
+// There are 2^(k+1) Sin entries in a table.
+// There are 2^(k+1) Cos entries in a table.
+//
+// Get Sin(Mpi/2^k) and Cos(Mpi/2^k) by table lookup.
+//
+//
+// Step 5
+// ======
+// Calculate Cos(r) and Sin(r) by polynomial approximation.
+//
+// Cos(r) = 1 + r^2 q1 + r^4 q2 + r^6 q3 + ... = Series for Cos
+// Sin(r) = r + r^3 p1 + r^5 p2 + r^7 p3 + ... = Series for Sin
+//
+// and the coefficients q1, q2, ... and p1, p2, ... are stored in a table
+//
+//
+// Calculate
+// Sin(x) = Sin(Mpi/2^k) Cos(r) + Cos(Mpi/2^k) Sin(r)
+//
+// as follows
+//
+// Sm = Sin(Mpi/2^k) and Cm = Cos(Mpi/2^k)
+// rsq = r*r
+//
+//
+// P = p1 + r^2p2 + r^4p3 + r^6p4
+// Q = q1 + r^2q2 + r^4q3 + r^6q4
+//
+// rcub = r * rsq
+// Sin(r) = r + rcub * P
+// = r + r^3p1 + r^5p2 + r^7p3 + r^9p4 + ... = Sin(r)
+//
+// The coefficients are not exactly these values, but almost.
+//
+// p1 = -1/6 = -1/3!
+// p2 = 1/120 = 1/5!
+// p3 = -1/5040 = -1/7!
+// p4 = 1/362889 = 1/9!
+//
+// P = r + rcub * P
+//
+// Answer = Sm Cos(r) + Cm P
+//
+// Cos(r) = 1 + rsq Q
+// Cos(r) = 1 + r^2 Q
+// Cos(r) = 1 + r^2 (q1 + r^2q2 + r^4q3 + r^6q4)
+// Cos(r) = 1 + r^2q1 + r^4q2 + r^6q3 + r^8q4 + ...
+//
+// Sm Cos(r) = Sm(1 + rsq Q)
+// Sm Cos(r) = Sm + Sm rsq Q
+// Sm Cos(r) = Sm + s_rsq Q
+// Q = Sm + s_rsq Q
+//
+// Then,
+//
+// Answer = Q + Cm P
+
+#include "libm_support.h"
+
+// Registers used
+//==============================================================
+// general input registers:
+// r14 -> r19
+// r32 -> r45
+
+// predicate registers used:
+// p6 -> p14
+
+// floating-point registers used
+// f9 -> f15
+// f32 -> f61
+
+// Assembly macros
+//==============================================================
+sind_NORM_f8 = f9
+sind_W = f10
+sind_int_Nfloat = f11
+sind_Nfloat = f12
+
+sind_r = f13
+sind_rsq = f14
+sind_rcub = f15
+
+sind_Inv_Pi_by_16 = f32
+sind_Pi_by_16_hi = f33
+sind_Pi_by_16_lo = f34
+
+sind_Inv_Pi_by_64 = f35
+sind_Pi_by_64_hi = f36
+sind_Pi_by_64_lo = f37
+
+sind_Sm = f38
+sind_Cm = f39
+
+sind_P1 = f40
+sind_Q1 = f41
+sind_P2 = f42
+sind_Q2 = f43
+sind_P3 = f44
+sind_Q3 = f45
+sind_P4 = f46
+sind_Q4 = f47
+
+sind_P_temp1 = f48
+sind_P_temp2 = f49
+
+sind_Q_temp1 = f50
+sind_Q_temp2 = f51
+
+sind_P = f52
+sind_Q = f53
+
+sind_srsq = f54
+
+sind_SIG_INV_PI_BY_16_2TO61 = f55
+sind_RSHF_2TO61 = f56
+sind_RSHF = f57
+sind_2TOM61 = f58
+sind_NFLOAT = f59
+sind_W_2TO61_RSH = f60
+
+fp_tmp = f61
+
+/////////////////////////////////////////////////////////////
+
+sind_AD_1 = r33
+sind_AD_2 = r34
+sind_exp_limit = r35
+sind_r_signexp = r36
+sind_AD_beta_table = r37
+sind_r_sincos = r38
+
+sind_r_exp = r39
+sind_r_17_ones = r40
+
+sind_GR_sig_inv_pi_by_16 = r14
+sind_GR_rshf_2to61 = r15
+sind_GR_rshf = r16
+sind_GR_exp_2tom61 = r17
+sind_GR_n = r18
+sind_GR_m = r19
+sind_GR_32m = r19
+
+gr_tmp = r41
+GR_SAVE_PFS = r41
+GR_SAVE_B0 = r42
+GR_SAVE_GP = r43
+
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
+.align 16
+double_sind_pi:
+ASM_TYPE_DIRECTIVE(double_sind_pi,@object)
+// data8 0xA2F9836E4E44152A, 0x00004001 // 16/pi (significand loaded w/ setf)
+// c90fdaa22168c234
+ data8 0xC90FDAA22168C234, 0x00003FFC // pi/16 hi
+// c4c6628b80dc1cd1 29024e088a
+ data8 0xC4C6628B80DC1CD1, 0x00003FBC // pi/16 lo
+ASM_SIZE_DIRECTIVE(double_sind_pi)
+
+double_sind_pq_k4:
+ASM_TYPE_DIRECTIVE(double_sind_pq_k4,@object)
+ data8 0x3EC71C963717C63A // P4
+ data8 0x3EF9FFBA8F191AE6 // Q4
+ data8 0xBF2A01A00F4E11A8 // P3
+ data8 0xBF56C16C05AC77BF // Q3
+ data8 0x3F8111111110F167 // P2
+ data8 0x3FA555555554DD45 // Q2
+ data8 0xBFC5555555555555 // P1
+ data8 0xBFDFFFFFFFFFFFFC // Q1
+ASM_SIZE_DIRECTIVE(double_sind_pq_k4)
+
+
+double_sin_cos_beta_k4:
+ASM_TYPE_DIRECTIVE(double_sin_cos_beta_k4,@object)
+data8 0x0000000000000000 , 0x00000000 // sin( 0 pi/16) S0
+data8 0x8000000000000000 , 0x00003fff // cos( 0 pi/16) C0
+
+data8 0xc7c5c1e34d3055b3 , 0x00003ffc // sin( 1 pi/16) S1
+data8 0xfb14be7fbae58157 , 0x00003ffe // cos( 1 pi/16) C1
+
+data8 0xc3ef1535754b168e , 0x00003ffd // sin( 2 pi/16) S2
+data8 0xec835e79946a3146 , 0x00003ffe // cos( 2 pi/16) C2
+
+data8 0x8e39d9cd73464364 , 0x00003ffe // sin( 3 pi/16) S3
+data8 0xd4db3148750d181a , 0x00003ffe // cos( 3 pi/16) C3
+
+data8 0xb504f333f9de6484 , 0x00003ffe // sin( 4 pi/16) S4
+data8 0xb504f333f9de6484 , 0x00003ffe // cos( 4 pi/16) C4
+
+
+data8 0xd4db3148750d181a , 0x00003ffe // sin( 5 pi/16) C3
+data8 0x8e39d9cd73464364 , 0x00003ffe // cos( 5 pi/16) S3
+
+data8 0xec835e79946a3146 , 0x00003ffe // sin( 6 pi/16) C2
+data8 0xc3ef1535754b168e , 0x00003ffd // cos( 6 pi/16) S2
+
+data8 0xfb14be7fbae58157 , 0x00003ffe // sin( 7 pi/16) C1
+data8 0xc7c5c1e34d3055b3 , 0x00003ffc // cos( 7 pi/16) S1
+
+data8 0x8000000000000000 , 0x00003fff // sin( 8 pi/16) C0
+data8 0x0000000000000000 , 0x00000000 // cos( 8 pi/16) S0
+
+
+data8 0xfb14be7fbae58157 , 0x00003ffe // sin( 9 pi/16) C1
+data8 0xc7c5c1e34d3055b3 , 0x0000bffc // cos( 9 pi/16) -S1
+
+data8 0xec835e79946a3146 , 0x00003ffe // sin(10 pi/16) C2
+data8 0xc3ef1535754b168e , 0x0000bffd // cos(10 pi/16) -S2
+
+data8 0xd4db3148750d181a , 0x00003ffe // sin(11 pi/16) C3
+data8 0x8e39d9cd73464364 , 0x0000bffe // cos(11 pi/16) -S3
+
+data8 0xb504f333f9de6484 , 0x00003ffe // sin(12 pi/16) S4
+data8 0xb504f333f9de6484 , 0x0000bffe // cos(12 pi/16) -S4
+
+
+data8 0x8e39d9cd73464364 , 0x00003ffe // sin(13 pi/16) S3
+data8 0xd4db3148750d181a , 0x0000bffe // cos(13 pi/16) -C3
+
+data8 0xc3ef1535754b168e , 0x00003ffd // sin(14 pi/16) S2
+data8 0xec835e79946a3146 , 0x0000bffe // cos(14 pi/16) -C2
+
+data8 0xc7c5c1e34d3055b3 , 0x00003ffc // sin(15 pi/16) S1
+data8 0xfb14be7fbae58157 , 0x0000bffe // cos(15 pi/16) -C1
+
+data8 0x0000000000000000 , 0x00000000 // sin(16 pi/16) S0
+data8 0x8000000000000000 , 0x0000bfff // cos(16 pi/16) -C0
+
+
+data8 0xc7c5c1e34d3055b3 , 0x0000bffc // sin(17 pi/16) -S1
+data8 0xfb14be7fbae58157 , 0x0000bffe // cos(17 pi/16) -C1
+
+data8 0xc3ef1535754b168e , 0x0000bffd // sin(18 pi/16) -S2
+data8 0xec835e79946a3146 , 0x0000bffe // cos(18 pi/16) -C2
+
+data8 0x8e39d9cd73464364 , 0x0000bffe // sin(19 pi/16) -S3
+data8 0xd4db3148750d181a , 0x0000bffe // cos(19 pi/16) -C3
+
+data8 0xb504f333f9de6484 , 0x0000bffe // sin(20 pi/16) -S4
+data8 0xb504f333f9de6484 , 0x0000bffe // cos(20 pi/16) -S4
+
+
+data8 0xd4db3148750d181a , 0x0000bffe // sin(21 pi/16) -C3
+data8 0x8e39d9cd73464364 , 0x0000bffe // cos(21 pi/16) -S3
+
+data8 0xec835e79946a3146 , 0x0000bffe // sin(22 pi/16) -C2
+data8 0xc3ef1535754b168e , 0x0000bffd // cos(22 pi/16) -S2
+
+data8 0xfb14be7fbae58157 , 0x0000bffe // sin(23 pi/16) -C1
+data8 0xc7c5c1e34d3055b3 , 0x0000bffc // cos(23 pi/16) -S1
+
+data8 0x8000000000000000 , 0x0000bfff // sin(24 pi/16) -C0
+data8 0x0000000000000000 , 0x00000000 // cos(24 pi/16) S0
+
+
+data8 0xfb14be7fbae58157 , 0x0000bffe // sin(25 pi/16) -C1
+data8 0xc7c5c1e34d3055b3 , 0x00003ffc // cos(25 pi/16) S1
+
+data8 0xec835e79946a3146 , 0x0000bffe // sin(26 pi/16) -C2
+data8 0xc3ef1535754b168e , 0x00003ffd // cos(26 pi/16) S2
+
+data8 0xd4db3148750d181a , 0x0000bffe // sin(27 pi/16) -C3
+data8 0x8e39d9cd73464364 , 0x00003ffe // cos(27 pi/16) S3
+
+data8 0xb504f333f9de6484 , 0x0000bffe // sin(28 pi/16) -S4
+data8 0xb504f333f9de6484 , 0x00003ffe // cos(28 pi/16) S4
+
+
+data8 0x8e39d9cd73464364 , 0x0000bffe // sin(29 pi/16) -S3
+data8 0xd4db3148750d181a , 0x00003ffe // cos(29 pi/16) C3
+
+data8 0xc3ef1535754b168e , 0x0000bffd // sin(30 pi/16) -S2
+data8 0xec835e79946a3146 , 0x00003ffe // cos(30 pi/16) C2
+
+data8 0xc7c5c1e34d3055b3 , 0x0000bffc // sin(31 pi/16) -S1
+data8 0xfb14be7fbae58157 , 0x00003ffe // cos(31 pi/16) C1
+
+data8 0x0000000000000000 , 0x00000000 // sin(32 pi/16) S0
+data8 0x8000000000000000 , 0x00003fff // cos(32 pi/16) C0
+ASM_SIZE_DIRECTIVE(double_sin_cos_beta_k4)
+
+.align 32
+.global sin#
+.global cos#
+#ifdef _LIBC
+.global __sin#
+.global __cos#
+#endif
+
+////////////////////////////////////////////////////////
+// There are two entry points: sin and cos
+
+
+// If from sin, p8 is true
+// If from cos, p9 is true
+
+.section .text
+.proc sin#
+#ifdef _LIBC
+.proc __sin#
+#endif
+.align 32
+
+sin:
+#ifdef _LIBC
+__sin:
+#endif
+
+{ .mlx
+ alloc r32=ar.pfs,1,13,0,0
+ movl sind_GR_sig_inv_pi_by_16 = 0xA2F9836E4E44152A // significand of 16/pi
+}
+{ .mlx
+ addl sind_AD_1 = @ltoff(double_sind_pi), gp
+ movl sind_GR_rshf_2to61 = 0x47b8000000000000 // 1.1000 2^(63+63-2)
+}
+;;
+
+{ .mfi
+ ld8 sind_AD_1 = [sind_AD_1]
+ fnorm sind_NORM_f8 = f8
+ cmp.eq p8,p9 = r0, r0
+}
+{ .mib
+ mov sind_GR_exp_2tom61 = 0xffff-61 // exponent of scaling factor 2^-61
+ mov sind_r_sincos = 0x0
+ br.cond.sptk L(SIND_SINCOS)
+}
+;;
+
+.endp sin
+ASM_SIZE_DIRECTIVE(sin)
+
+
+.section .text
+.proc cos#
+#ifdef _LIBC
+.proc __cos#
+#endif
+.align 32
+cos:
+#ifdef _LIBC
+__cos:
+#endif
+
+{ .mlx
+ alloc r32=ar.pfs,1,13,0,0
+ movl sind_GR_sig_inv_pi_by_16 = 0xA2F9836E4E44152A // significand of 16/pi
+}
+{ .mlx
+ addl sind_AD_1 = @ltoff(double_sind_pi), gp
+ movl sind_GR_rshf_2to61 = 0x47b8000000000000 // 1.1000 2^(63+63-2)
+}
+;;
+
+{ .mfi
+ ld8 sind_AD_1 = [sind_AD_1]
+ fnorm.s1 sind_NORM_f8 = f8
+ cmp.eq p9,p8 = r0, r0
+}
+{ .mib
+ mov sind_GR_exp_2tom61 = 0xffff-61 // exponent of scaling factor 2^-61
+ mov sind_r_sincos = 0x8
+ br.cond.sptk L(SIND_SINCOS)
+}
+;;
+
+
+////////////////////////////////////////////////////////
+// All entry points end up here.
+// If from sin, sind_r_sincos is 0 and p8 is true
+// If from cos, sind_r_sincos is 8 = 2^(k-1) and p9 is true
+// We add sind_r_sincos to N
+
+L(SIND_SINCOS):
+
+
+// Form two constants we need
+// 16/pi * 2^-2 * 2^63, scaled by 2^61 since we just loaded the significand
+// 1.1000...000 * 2^(63+63-2) to right shift int(W) into the low significand
+// fcmp used to set denormal, and invalid on snans
+{ .mfi
+ setf.sig sind_SIG_INV_PI_BY_16_2TO61 = sind_GR_sig_inv_pi_by_16
+ fcmp.eq.s0 p12,p0=f8,f0
+ mov sind_r_17_ones = 0x1ffff
+}
+{ .mlx
+ setf.d sind_RSHF_2TO61 = sind_GR_rshf_2to61
+ movl sind_GR_rshf = 0x43e8000000000000 // 1.1000 2^63 for right shift
+}
+;;
+
+// Form another constant
+// 2^-61 for scaling Nfloat
+// 0x10009 is register_bias + 10.
+// So if f8 > 2^10 = Gamma, go to DBX
+{ .mfi
+ setf.exp sind_2TOM61 = sind_GR_exp_2tom61
+ fclass.m p13,p0 = f8, 0x23 // Test for x inf
+ mov sind_exp_limit = 0x10009
+}
+;;
+
+// Load the two pieces of pi/16
+// Form another constant
+// 1.1000...000 * 2^63, the right shift constant
+{ .mmf
+ ldfe sind_Pi_by_16_hi = [sind_AD_1],16
+ setf.d sind_RSHF = sind_GR_rshf
+ fclass.m p14,p0 = f8, 0xc3 // Test for x nan
+}
+;;
+
+{ .mfi
+ ldfe sind_Pi_by_16_lo = [sind_AD_1],16
+(p13) frcpa.s0 f8,p12=f0,f0 // force qnan indef for x=inf
+ addl gr_tmp = -1,r0
+}
+{ .mfb
+ addl sind_AD_beta_table = @ltoff(double_sin_cos_beta_k4), gp
+ nop.f 999
+(p13) br.ret.spnt b0 ;; // Exit for x=inf
+}
+
+// Start loading P, Q coefficients
+// SIN(0)
+{ .mfi
+ ldfpd sind_P4,sind_Q4 = [sind_AD_1],16
+(p8) fclass.m.unc p6,p0 = f8, 0x07 // Test for sin(0)
+ nop.i 999
+}
+{ .mfb
+ addl sind_AD_beta_table = @ltoff(double_sin_cos_beta_k4), gp
+(p14) fma.d f8=f8,f1,f0 // qnan for x=nan
+(p14) br.ret.spnt b0 ;; // Exit for x=nan
+}
+
+
+// COS(0)
+{ .mfi
+ getf.exp sind_r_signexp = f8
+(p9) fclass.m.unc p7,p0 = f8, 0x07 // Test for sin(0)
+ nop.i 999
+}
+{ .mfi
+ ld8 sind_AD_beta_table = [sind_AD_beta_table]
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mmb
+ ldfpd sind_P3,sind_Q3 = [sind_AD_1],16
+ setf.sig fp_tmp = gr_tmp // Create constant such that fmpy sets inexact
+(p6) br.ret.spnt b0 ;;
+}
+
+{ .mfb
+ and sind_r_exp = sind_r_17_ones, sind_r_signexp
+(p7) fmerge.s f8 = f1,f1
+(p7) br.ret.spnt b0 ;;
+}
+
+// p10 is true if we must call routines to handle larger arguments
+// p10 is true if f8 exp is > 0x10009
+
+{ .mfi
+ ldfpd sind_P2,sind_Q2 = [sind_AD_1],16
+ nop.f 999
+ cmp.ge p10,p0 = sind_r_exp,sind_exp_limit
+}
+;;
+
+// sind_W = x * sind_Inv_Pi_by_16
+// Multiply x by scaled 16/pi and add large const to shift integer part of W to
+// rightmost bits of significand
+{ .mfi
+ ldfpd sind_P1,sind_Q1 = [sind_AD_1]
+ fma.s1 sind_W_2TO61_RSH = sind_NORM_f8,sind_SIG_INV_PI_BY_16_2TO61,sind_RSHF_2TO61
+ nop.i 999
+}
+{ .mbb
+(p10) cmp.ne.unc p11,p12=sind_r_sincos,r0 // p11 call __libm_cos_double_dbx
+ // p12 call __libm_sin_double_dbx
+(p11) br.cond.spnt L(COSD_DBX)
+(p12) br.cond.spnt L(SIND_DBX)
+}
+;;
+
+
+// sind_NFLOAT = Round_Int_Nearest(sind_W)
+// This is done by scaling back by 2^-61 and subtracting the shift constant
+{ .mfi
+ nop.m 999
+ fms.s1 sind_NFLOAT = sind_W_2TO61_RSH,sind_2TOM61,sind_RSHF
+ nop.i 999 ;;
+}
+
+
+// get N = (int)sind_int_Nfloat
+{ .mfi
+ getf.sig sind_GR_n = sind_W_2TO61_RSH
+ nop.f 999
+ nop.i 999 ;;
+}
+
+// Add 2^(k-1) (which is in sind_r_sincos) to N
+// sind_r = -sind_Nfloat * sind_Pi_by_16_hi + x
+// sind_r = sind_r -sind_Nfloat * sind_Pi_by_16_lo
+{ .mfi
+ add sind_GR_n = sind_GR_n, sind_r_sincos
+ fnma.s1 sind_r = sind_NFLOAT, sind_Pi_by_16_hi, sind_NORM_f8
+ nop.i 999 ;;
+}
+
+
+// Get M (least k+1 bits of N)
+{ .mmi
+ and sind_GR_m = 0x1f,sind_GR_n ;;
+ nop.m 999
+ shl sind_GR_32m = sind_GR_m,5 ;;
+}
+
+// Add 32*M to address of sin_cos_beta table
+{ .mmi
+ add sind_AD_2 = sind_GR_32m, sind_AD_beta_table
+ nop.m 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ ldfe sind_Sm = [sind_AD_2],16
+(p8) fclass.m.unc p10,p0=f8,0x0b // If sin, note denormal input to set uflow
+ nop.i 999 ;;
+}
+
+{ .mfi
+ ldfe sind_Cm = [sind_AD_2]
+ fnma.s1 sind_r = sind_NFLOAT, sind_Pi_by_16_lo, sind_r
+ nop.i 999 ;;
+}
+
+// get rsq
+{ .mfi
+ nop.m 999
+ fma.s1 sind_rsq = sind_r, sind_r, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fmpy.s0 fp_tmp = fp_tmp,fp_tmp // fmpy forces inexact flag
+ nop.i 999 ;;
+}
+
+// form P and Q series
+{ .mfi
+ nop.m 999
+ fma.s1 sind_P_temp1 = sind_rsq, sind_P4, sind_P3
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 sind_Q_temp1 = sind_rsq, sind_Q4, sind_Q3
+ nop.i 999 ;;
+}
+
+// get rcube and sm*rsq
+{ .mfi
+ nop.m 999
+ fmpy.s1 sind_srsq = sind_Sm,sind_rsq
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+ fmpy.s1 sind_rcub = sind_r, sind_rsq
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 sind_Q_temp2 = sind_rsq, sind_Q_temp1, sind_Q2
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 sind_P_temp2 = sind_rsq, sind_P_temp1, sind_P2
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 sind_Q = sind_rsq, sind_Q_temp2, sind_Q1
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 sind_P = sind_rsq, sind_P_temp2, sind_P1
+ nop.i 999 ;;
+}
+
+// Get final P and Q
+{ .mfi
+ nop.m 999
+ fma.s1 sind_Q = sind_srsq,sind_Q, sind_Sm
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 sind_P = sind_rcub,sind_P, sind_r
+ nop.i 999 ;;
+}
+
+// If sin(denormal), force inexact to be set
+{ .mfi
+ nop.m 999
+(p10) fmpy.d.s0 fp_tmp = f8,f8
+ nop.i 999 ;;
+}
+
+// Final calculation
+{ .mfb
+ nop.m 999
+ fma.d f8 = sind_Cm, sind_P, sind_Q
+ br.ret.sptk b0 ;;
+}
+.endp cos#
+ASM_SIZE_DIRECTIVE(cos#)
+
+
+
+.proc __libm_callout_1s
+__libm_callout_1s:
+L(SIND_DBX):
+.prologue
+{ .mfi
+ nop.m 0
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs
+}
+;;
+
+{ .mfi
+ mov GR_SAVE_GP=gp
+ nop.f 0
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0
+}
+
+.body
+{ .mib
+ nop.m 999
+ nop.i 999
+ br.call.sptk.many b0=__libm_sin_double_dbx# ;;
+}
+;;
+
+
+{ .mfi
+ mov gp = GR_SAVE_GP
+ nop.f 999
+ mov b0 = GR_SAVE_B0
+}
+;;
+
+{ .mib
+ nop.m 999
+ mov ar.pfs = GR_SAVE_PFS
+ br.ret.sptk b0 ;;
+}
+.endp __libm_callout_1s
+ASM_SIZE_DIRECTIVE(__libm_callout_1s)
+
+
+.proc __libm_callout_1c
+__libm_callout_1c:
+L(COSD_DBX):
+.prologue
+{ .mfi
+ nop.m 0
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs
+}
+;;
+
+{ .mfi
+ mov GR_SAVE_GP=gp
+ nop.f 0
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0
+}
+
+.body
+{ .mib
+ nop.m 999
+ nop.i 999
+ br.call.sptk.many b0=__libm_cos_double_dbx# ;;
+}
+;;
+
+
+{ .mfi
+ mov gp = GR_SAVE_GP
+ nop.f 999
+ mov b0 = GR_SAVE_B0
+}
+;;
+
+{ .mib
+ nop.m 999
+ mov ar.pfs = GR_SAVE_PFS
+ br.ret.sptk b0 ;;
+}
+.endp __libm_callout_1c
+ASM_SIZE_DIRECTIVE(__libm_callout_1c)
+
+
+// ====================================================================
+// ====================================================================
+
+// These functions calculate the sin and cos for inputs
+// greater than 2^10
+// __libm_sin_double_dbx# and __libm_cos_double_dbx#
+
+// *********************************************************************
+// *********************************************************************
+//
+// Function: Combined sin(x) and cos(x), where
+//
+// sin(x) = sine(x), for double precision x values
+// cos(x) = cosine(x), for double precision x values
+//
+// *********************************************************************
+//
+// Accuracy: Within .7 ulps for 80-bit floating point values
+// Very accurate for double precision values
+//
+// *********************************************************************
+//
+// Resources Used:
+//
+// Floating-Point Registers: f8 (Input and Return Value)
+// f32-f99
+//
+// General Purpose Registers:
+// r32-r43
+// r44-r45 (Used to pass arguments to pi_by_2 reduce routine)
+//
+// Predicate Registers: p6-p13
+//
+// *********************************************************************
+//
+// IEEE Special Conditions:
+//
+// Denormal fault raised on denormal inputs
+// Overflow exceptions do not occur
+// Underflow exceptions raised when appropriate for sin
+// (No specialized error handling for this routine)
+// Inexact raised when appropriate by algorithm
+//
+// sin(SNaN) = QNaN
+// sin(QNaN) = QNaN
+// sin(inf) = QNaN
+// sin(+/-0) = +/-0
+// cos(inf) = QNaN
+// cos(SNaN) = QNaN
+// cos(QNaN) = QNaN
+// cos(0) = 1
+//
+// *********************************************************************
+//
+// Mathematical Description
+// ========================
+//
+// The computation of FSIN and FCOS is best handled in one piece of
+// code. The main reason is that given any argument Arg, computation
+// of trigonometric functions first calculate N and an approximation
+// to alpha where
+//
+// Arg = N pi/2 + alpha, |alpha| <= pi/4.
+//
+// Since
+//
+// cos( Arg ) = sin( (N+1) pi/2 + alpha ),
+//
+// therefore, the code for computing sine will produce cosine as long
+// as 1 is added to N immediately after the argument reduction
+// process.
+//
+// Let M = N if sine
+// N+1 if cosine.
+//
+// Now, given
+//
+// Arg = M pi/2 + alpha, |alpha| <= pi/4,
+//
+// let I = M mod 4, or I be the two lsb of M when M is represented
+// as 2's complement. I = [i_0 i_1]. Then
+//
+// sin( Arg ) = (-1)^i_0 sin( alpha ) if i_1 = 0,
+// = (-1)^i_0 cos( alpha ) if i_1 = 1.
+//
+// For example:
+// if M = -1, I = 11
+// sin ((-pi/2 + alpha) = (-1) cos (alpha)
+// if M = 0, I = 00
+// sin (alpha) = sin (alpha)
+// if M = 1, I = 01
+// sin (pi/2 + alpha) = cos (alpha)
+// if M = 2, I = 10
+// sin (pi + alpha) = (-1) sin (alpha)
+// if M = 3, I = 11
+// sin ((3/2)pi + alpha) = (-1) cos (alpha)
+//
+// The value of alpha is obtained by argument reduction and
+// represented by two working precision numbers r and c where
+//
+// alpha = r + c accurately.
+//
+// The reduction method is described in a previous write up.
+// The argument reduction scheme identifies 4 cases. For Cases 2
+// and 4, because |alpha| is small, sin(r+c) and cos(r+c) can be
+// computed very easily by 2 or 3 terms of the Taylor series
+// expansion as follows:
+//
+// Case 2:
+// -------
+//
+// sin(r + c) = r + c - r^3/6 accurately
+// cos(r + c) = 1 - 2^(-67) accurately
+//
+// Case 4:
+// -------
+//
+// sin(r + c) = r + c - r^3/6 + r^5/120 accurately
+// cos(r + c) = 1 - r^2/2 + r^4/24 accurately
+//
+// The only cases left are Cases 1 and 3 of the argument reduction
+// procedure. These two cases will be merged since after the
+// argument is reduced in either cases, we have the reduced argument
+// represented as r + c and that the magnitude |r + c| is not small
+// enough to allow the usage of a very short approximation.
+//
+// The required calculation is either
+//
+// sin(r + c) = sin(r) + correction, or
+// cos(r + c) = cos(r) + correction.
+//
+// Specifically,
+//
+// sin(r + c) = sin(r) + c sin'(r) + O(c^2)
+// = sin(r) + c cos (r) + O(c^2)
+// = sin(r) + c(1 - r^2/2) accurately.
+// Similarly,
+//
+// cos(r + c) = cos(r) - c sin(r) + O(c^2)
+// = cos(r) - c(r - r^3/6) accurately.
+//
+// We therefore concentrate on accurately calculating sin(r) and
+// cos(r) for a working-precision number r, |r| <= pi/4 to within
+// 0.1% or so.
+//
+// The greatest challenge of this task is that the second terms of
+// the Taylor series
+//
+// r - r^3/3! + r^r/5! - ...
+//
+// and
+//
+// 1 - r^2/2! + r^4/4! - ...
+//
+// are not very small when |r| is close to pi/4 and the rounding
+// errors will be a concern if simple polynomial accumulation is
+// used. When |r| < 2^-3, however, the second terms will be small
+// enough (6 bits or so of right shift) that a normal Horner
+// recurrence suffices. Hence there are two cases that we consider
+// in the accurate computation of sin(r) and cos(r), |r| <= pi/4.
+//
+// Case small_r: |r| < 2^(-3)
+// --------------------------
+//
+// Since Arg = M pi/4 + r + c accurately, and M mod 4 is [i_0 i_1],
+// we have
+//
+// sin(Arg) = (-1)^i_0 * sin(r + c) if i_1 = 0
+// = (-1)^i_0 * cos(r + c) if i_1 = 1
+//
+// can be accurately approximated by
+//
+// sin(Arg) = (-1)^i_0 * [sin(r) + c] if i_1 = 0
+// = (-1)^i_0 * [cos(r) - c*r] if i_1 = 1
+//
+// because |r| is small and thus the second terms in the correction
+// are unneccessary.
+//
+// Finally, sin(r) and cos(r) are approximated by polynomials of
+// moderate lengths.
+//
+// sin(r) = r + S_1 r^3 + S_2 r^5 + ... + S_5 r^11
+// cos(r) = 1 + C_1 r^2 + C_2 r^4 + ... + C_5 r^10
+//
+// We can make use of predicates to selectively calculate
+// sin(r) or cos(r) based on i_1.
+//
+// Case normal_r: 2^(-3) <= |r| <= pi/4
+// ------------------------------------
+//
+// This case is more likely than the previous one if one considers
+// r to be uniformly distributed in [-pi/4 pi/4]. Again,
+//
+// sin(Arg) = (-1)^i_0 * sin(r + c) if i_1 = 0
+// = (-1)^i_0 * cos(r + c) if i_1 = 1.
+//
+// Because |r| is now larger, we need one extra term in the
+// correction. sin(Arg) can be accurately approximated by
+//
+// sin(Arg) = (-1)^i_0 * [sin(r) + c(1-r^2/2)] if i_1 = 0
+// = (-1)^i_0 * [cos(r) - c*r*(1 - r^2/6)] i_1 = 1.
+//
+// Finally, sin(r) and cos(r) are approximated by polynomials of
+// moderate lengths.
+//
+// sin(r) = r + PP_1_hi r^3 + PP_1_lo r^3 +
+// PP_2 r^5 + ... + PP_8 r^17
+//
+// cos(r) = 1 + QQ_1 r^2 + QQ_2 r^4 + ... + QQ_8 r^16
+//
+// where PP_1_hi is only about 16 bits long and QQ_1 is -1/2.
+// The crux in accurate computation is to calculate
+//
+// r + PP_1_hi r^3 or 1 + QQ_1 r^2
+//
+// accurately as two pieces: U_hi and U_lo. The way to achieve this
+// is to obtain r_hi as a 10 sig. bit number that approximates r to
+// roughly 8 bits or so of accuracy. (One convenient way is
+//
+// r_hi := frcpa( frcpa( r ) ).)
+//
+// This way,
+//
+// r + PP_1_hi r^3 = r + PP_1_hi r_hi^3 +
+// PP_1_hi (r^3 - r_hi^3)
+// = [r + PP_1_hi r_hi^3] +
+// [PP_1_hi (r - r_hi)
+// (r^2 + r_hi r + r_hi^2) ]
+// = U_hi + U_lo
+//
+// Since r_hi is only 10 bit long and PP_1_hi is only 16 bit long,
+// PP_1_hi * r_hi^3 is only at most 46 bit long and thus computed
+// exactly. Furthermore, r and PP_1_hi r_hi^3 are of opposite sign
+// and that there is no more than 8 bit shift off between r and
+// PP_1_hi * r_hi^3. Hence the sum, U_hi, is representable and thus
+// calculated without any error. Finally, the fact that
+//
+// |U_lo| <= 2^(-8) |U_hi|
+//
+// says that U_hi + U_lo is approximating r + PP_1_hi r^3 to roughly
+// 8 extra bits of accuracy.
+//
+// Similarly,
+//
+// 1 + QQ_1 r^2 = [1 + QQ_1 r_hi^2] +
+// [QQ_1 (r - r_hi)(r + r_hi)]
+// = U_hi + U_lo.
+//
+// Summarizing, we calculate r_hi = frcpa( frcpa( r ) ).
+//
+// If i_1 = 0, then
+//
+// U_hi := r + PP_1_hi * r_hi^3
+// U_lo := PP_1_hi * (r - r_hi) * (r^2 + r*r_hi + r_hi^2)
+// poly := PP_1_lo r^3 + PP_2 r^5 + ... + PP_8 r^17
+// correction := c * ( 1 + C_1 r^2 )
+//
+// Else ...i_1 = 1
+//
+// U_hi := 1 + QQ_1 * r_hi * r_hi
+// U_lo := QQ_1 * (r - r_hi) * (r + r_hi)
+// poly := QQ_2 * r^4 + QQ_3 * r^6 + ... + QQ_8 r^16
+// correction := -c * r * (1 + S_1 * r^2)
+//
+// End
+//
+// Finally,
+//
+// V := poly + ( U_lo + correction )
+//
+// / U_hi + V if i_0 = 0
+// result := |
+// \ (-U_hi) - V if i_0 = 1
+//
+// It is important that in the last step, negation of U_hi is
+// performed prior to the subtraction which is to be performed in
+// the user-set rounding mode.
+//
+//
+// Algorithmic Description
+// =======================
+//
+// The argument reduction algorithm is tightly integrated into FSIN
+// and FCOS which share the same code. The following is complete and
+// self-contained. The argument reduction description given
+// previously is repeated below.
+//
+//
+// Step 0. Initialization.
+//
+// If FSIN is invoked, set N_inc := 0; else if FCOS is invoked,
+// set N_inc := 1.
+//
+// Step 1. Check for exceptional and special cases.
+//
+// * If Arg is +-0, +-inf, NaN, NaT, go to Step 10 for special
+// handling.
+// * If |Arg| < 2^24, go to Step 2 for reduction of moderate
+// arguments. This is the most likely case.
+// * If |Arg| < 2^63, go to Step 8 for pre-reduction of large
+// arguments.
+// * If |Arg| >= 2^63, go to Step 10 for special handling.
+//
+// Step 2. Reduction of moderate arguments.
+//
+// If |Arg| < pi/4 ...quick branch
+// N_fix := N_inc (integer)
+// r := Arg
+// c := 0.0
+// Branch to Step 4, Case_1_complete
+// Else ...cf. argument reduction
+// N := Arg * two_by_PI (fp)
+// N_fix := fcvt.fx( N ) (int)
+// N := fcvt.xf( N_fix )
+// N_fix := N_fix + N_inc
+// s := Arg - N * P_1 (first piece of pi/2)
+// w := -N * P_2 (second piece of pi/2)
+//
+// If |s| >= 2^(-33)
+// go to Step 3, Case_1_reduce
+// Else
+// go to Step 7, Case_2_reduce
+// Endif
+// Endif
+//
+// Step 3. Case_1_reduce.
+//
+// r := s + w
+// c := (s - r) + w ...observe order
+//
+// Step 4. Case_1_complete
+//
+// ...At this point, the reduced argument alpha is
+// ...accurately represented as r + c.
+// If |r| < 2^(-3), go to Step 6, small_r.
+//
+// Step 5. Normal_r.
+//
+// Let [i_0 i_1] by the 2 lsb of N_fix.
+// FR_rsq := r * r
+// r_hi := frcpa( frcpa( r ) )
+// r_lo := r - r_hi
+//
+// If i_1 = 0, then
+// poly := r*FR_rsq*(PP_1_lo + FR_rsq*(PP_2 + ... FR_rsq*PP_8))
+// U_hi := r + PP_1_hi*r_hi*r_hi*r_hi ...any order
+// U_lo := PP_1_hi*r_lo*(r*r + r*r_hi + r_hi*r_hi)
+// correction := c + c*C_1*FR_rsq ...any order
+// Else
+// poly := FR_rsq*FR_rsq*(QQ_2 + FR_rsq*(QQ_3 + ... + FR_rsq*QQ_8))
+// U_hi := 1 + QQ_1 * r_hi * r_hi ...any order
+// U_lo := QQ_1 * r_lo * (r + r_hi)
+// correction := -c*(r + S_1*FR_rsq*r) ...any order
+// Endif
+//
+// V := poly + (U_lo + correction) ...observe order
+//
+// result := (i_0 == 0? 1.0 : -1.0)
+//
+// Last instruction in user-set rounding mode
+//
+// result := (i_0 == 0? result*U_hi + V :
+// result*U_hi - V)
+//
+// Return
+//
+// Step 6. Small_r.
+//
+// ...Use flush to zero mode without causing exception
+// Let [i_0 i_1] be the two lsb of N_fix.
+//
+// FR_rsq := r * r
+//
+// If i_1 = 0 then
+// z := FR_rsq*FR_rsq; z := FR_rsq*z *r
+// poly_lo := S_3 + FR_rsq*(S_4 + FR_rsq*S_5)
+// poly_hi := r*FR_rsq*(S_1 + FR_rsq*S_2)
+// correction := c
+// result := r
+// Else
+// z := FR_rsq*FR_rsq; z := FR_rsq*z
+// poly_lo := C_3 + FR_rsq*(C_4 + FR_rsq*C_5)
+// poly_hi := FR_rsq*(C_1 + FR_rsq*C_2)
+// correction := -c*r
+// result := 1
+// Endif
+//
+// poly := poly_hi + (z * poly_lo + correction)
+//
+// If i_0 = 1, result := -result
+//
+// Last operation. Perform in user-set rounding mode
+//
+// result := (i_0 == 0? result + poly :
+// result - poly )
+// Return
+//
+// Step 7. Case_2_reduce.
+//
+// ...Refer to the write up for argument reduction for
+// ...rationale. The reduction algorithm below is taken from
+// ...argument reduction description and integrated this.
+//
+// w := N*P_3
+// U_1 := N*P_2 + w ...FMA
+// U_2 := (N*P_2 - U_1) + w ...2 FMA
+// ...U_1 + U_2 is N*(P_2+P_3) accurately
+//
+// r := s - U_1
+// c := ( (s - r) - U_1 ) - U_2
+//
+// ...The mathematical sum r + c approximates the reduced
+// ...argument accurately. Note that although compared to
+// ...Case 1, this case requires much more work to reduce
+// ...the argument, the subsequent calculation needed for
+// ...any of the trigonometric function is very little because
+// ...|alpha| < 1.01*2^(-33) and thus two terms of the
+// ...Taylor series expansion suffices.
+//
+// If i_1 = 0 then
+// poly := c + S_1 * r * r * r ...any order
+// result := r
+// Else
+// poly := -2^(-67)
+// result := 1.0
+// Endif
+//
+// If i_0 = 1, result := -result
+//
+// Last operation. Perform in user-set rounding mode
+//
+// result := (i_0 == 0? result + poly :
+// result - poly )
+//
+// Return
+//
+//
+// Step 8. Pre-reduction of large arguments.
+//
+// ...Again, the following reduction procedure was described
+// ...in the separate write up for argument reduction, which
+// ...is tightly integrated here.
+
+// N_0 := Arg * Inv_P_0
+// N_0_fix := fcvt.fx( N_0 )
+// N_0 := fcvt.xf( N_0_fix)
+
+// Arg' := Arg - N_0 * P_0
+// w := N_0 * d_1
+// N := Arg' * two_by_PI
+// N_fix := fcvt.fx( N )
+// N := fcvt.xf( N_fix )
+// N_fix := N_fix + N_inc
+//
+// s := Arg' - N * P_1
+// w := w - N * P_2
+//
+// If |s| >= 2^(-14)
+// go to Step 3
+// Else
+// go to Step 9
+// Endif
+//
+// Step 9. Case_4_reduce.
+//
+// ...first obtain N_0*d_1 and -N*P_2 accurately
+// U_hi := N_0 * d_1 V_hi := -N*P_2
+// U_lo := N_0 * d_1 - U_hi V_lo := -N*P_2 - U_hi ...FMAs
+//
+// ...compute the contribution from N_0*d_1 and -N*P_3
+// w := -N*P_3
+// w := w + N_0*d_2
+// t := U_lo + V_lo + w ...any order
+//
+// ...at this point, the mathematical value
+// ...s + U_hi + V_hi + t approximates the true reduced argument
+// ...accurately. Just need to compute this accurately.
+//
+// ...Calculate U_hi + V_hi accurately:
+// A := U_hi + V_hi
+// if |U_hi| >= |V_hi| then
+// a := (U_hi - A) + V_hi
+// else
+// a := (V_hi - A) + U_hi
+// endif
+// ...order in computing "a" must be observed. This branch is
+// ...best implemented by predicates.
+// ...A + a is U_hi + V_hi accurately. Moreover, "a" is
+// ...much smaller than A: |a| <= (1/2)ulp(A).
+//
+// ...Just need to calculate s + A + a + t
+// C_hi := s + A t := t + a
+// C_lo := (s - C_hi) + A
+// C_lo := C_lo + t
+//
+// ...Final steps for reduction
+// r := C_hi + C_lo
+// c := (C_hi - r) + C_lo
+//
+// ...At this point, we have r and c
+// ...And all we need is a couple of terms of the corresponding
+// ...Taylor series.
+//
+// If i_1 = 0
+// poly := c + r*FR_rsq*(S_1 + FR_rsq*S_2)
+// result := r
+// Else
+// poly := FR_rsq*(C_1 + FR_rsq*C_2)
+// result := 1
+// Endif
+//
+// If i_0 = 1, result := -result
+//
+// Last operation. Perform in user-set rounding mode
+//
+// result := (i_0 == 0? result + poly :
+// result - poly )
+// Return
+//
+// Large Arguments: For arguments above 2**63, a Payne-Hanek
+// style argument reduction is used and pi_by_2 reduce is called.
+//
+
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+.align 64
+
+FSINCOS_CONSTANTS:
+ASM_TYPE_DIRECTIVE(FSINCOS_CONSTANTS,@object)
+data4 0x4B800000, 0xCB800000, 0x00000000,0x00000000 // two**24, -two**24
+data4 0x4E44152A, 0xA2F9836E, 0x00003FFE,0x00000000 // Inv_pi_by_2
+data4 0xCE81B9F1, 0xC84D32B0, 0x00004016,0x00000000 // P_0
+data4 0x2168C235, 0xC90FDAA2, 0x00003FFF,0x00000000 // P_1
+data4 0xFC8F8CBB, 0xECE675D1, 0x0000BFBD,0x00000000 // P_2
+data4 0xACC19C60, 0xB7ED8FBB, 0x0000BF7C,0x00000000 // P_3
+data4 0x5F000000, 0xDF000000, 0x00000000,0x00000000 // two_to_63, -two_to_63
+data4 0x6EC6B45A, 0xA397E504, 0x00003FE7,0x00000000 // Inv_P_0
+data4 0xDBD171A1, 0x8D848E89, 0x0000BFBF,0x00000000 // d_1
+data4 0x18A66F8E, 0xD5394C36, 0x0000BF7C,0x00000000 // d_2
+data4 0x2168C234, 0xC90FDAA2, 0x00003FFE,0x00000000 // pi_by_4
+data4 0x2168C234, 0xC90FDAA2, 0x0000BFFE,0x00000000 // neg_pi_by_4
+data4 0x3E000000, 0xBE000000, 0x00000000,0x00000000 // two**-3, -two**-3
+data4 0x2F000000, 0xAF000000, 0x9E000000,0x00000000 // two**-33, -two**-33, -two**-67
+data4 0xA21C0BC9, 0xCC8ABEBC, 0x00003FCE,0x00000000 // PP_8
+data4 0x720221DA, 0xD7468A05, 0x0000BFD6,0x00000000 // PP_7
+data4 0x640AD517, 0xB092382F, 0x00003FDE,0x00000000 // PP_6
+data4 0xD1EB75A4, 0xD7322B47, 0x0000BFE5,0x00000000 // PP_5
+data4 0xFFFFFFFE, 0xFFFFFFFF, 0x0000BFFD,0x00000000 // C_1
+data4 0x00000000, 0xAAAA0000, 0x0000BFFC,0x00000000 // PP_1_hi
+data4 0xBAF69EEA, 0xB8EF1D2A, 0x00003FEC,0x00000000 // PP_4
+data4 0x0D03BB69, 0xD00D00D0, 0x0000BFF2,0x00000000 // PP_3
+data4 0x88888962, 0x88888888, 0x00003FF8,0x00000000 // PP_2
+data4 0xAAAB0000, 0xAAAAAAAA, 0x0000BFEC,0x00000000 // PP_1_lo
+data4 0xC2B0FE52, 0xD56232EF, 0x00003FD2,0x00000000 // QQ_8
+data4 0x2B48DCA6, 0xC9C99ABA, 0x0000BFDA,0x00000000 // QQ_7
+data4 0x9C716658, 0x8F76C650, 0x00003FE2,0x00000000 // QQ_6
+data4 0xFDA8D0FC, 0x93F27DBA, 0x0000BFE9,0x00000000 // QQ_5
+data4 0xAAAAAAAA, 0xAAAAAAAA, 0x0000BFFC,0x00000000 // S_1
+data4 0x00000000, 0x80000000, 0x0000BFFE,0x00000000 // QQ_1
+data4 0x0C6E5041, 0xD00D00D0, 0x00003FEF,0x00000000 // QQ_4
+data4 0x0B607F60, 0xB60B60B6, 0x0000BFF5,0x00000000 // QQ_3
+data4 0xAAAAAA9B, 0xAAAAAAAA, 0x00003FFA,0x00000000 // QQ_2
+data4 0xFFFFFFFE, 0xFFFFFFFF, 0x0000BFFD,0x00000000 // C_1
+data4 0xAAAA719F, 0xAAAAAAAA, 0x00003FFA,0x00000000 // C_2
+data4 0x0356F994, 0xB60B60B6, 0x0000BFF5,0x00000000 // C_3
+data4 0xB2385EA9, 0xD00CFFD5, 0x00003FEF,0x00000000 // C_4
+data4 0x292A14CD, 0x93E4BD18, 0x0000BFE9,0x00000000 // C_5
+data4 0xAAAAAAAA, 0xAAAAAAAA, 0x0000BFFC,0x00000000 // S_1
+data4 0x888868DB, 0x88888888, 0x00003FF8,0x00000000 // S_2
+data4 0x055EFD4B, 0xD00D00D0, 0x0000BFF2,0x00000000 // S_3
+data4 0x839730B9, 0xB8EF1C5D, 0x00003FEC,0x00000000 // S_4
+data4 0xE5B3F492, 0xD71EA3A4, 0x0000BFE5,0x00000000 // S_5
+data4 0x38800000, 0xB8800000, 0x00000000 // two**-14, -two**-14
+ASM_SIZE_DIRECTIVE(FSINCOS_CONSTANTS)
+
+FR_Input_X = f8
+FR_Neg_Two_to_M3 = f32
+FR_Two_to_63 = f32
+FR_Two_to_24 = f33
+FR_Pi_by_4 = f33
+FR_Two_to_M14 = f34
+FR_Two_to_M33 = f35
+FR_Neg_Two_to_24 = f36
+FR_Neg_Pi_by_4 = f36
+FR_Neg_Two_to_M14 = f37
+FR_Neg_Two_to_M33 = f38
+FR_Neg_Two_to_M67 = f39
+FR_Inv_pi_by_2 = f40
+FR_N_float = f41
+FR_N_fix = f42
+FR_P_1 = f43
+FR_P_2 = f44
+FR_P_3 = f45
+FR_s = f46
+FR_w = f47
+FR_c = f48
+FR_r = f49
+FR_Z = f50
+FR_A = f51
+FR_a = f52
+FR_t = f53
+FR_U_1 = f54
+FR_U_2 = f55
+FR_C_1 = f56
+FR_C_2 = f57
+FR_C_3 = f58
+FR_C_4 = f59
+FR_C_5 = f60
+FR_S_1 = f61
+FR_S_2 = f62
+FR_S_3 = f63
+FR_S_4 = f64
+FR_S_5 = f65
+FR_poly_hi = f66
+FR_poly_lo = f67
+FR_r_hi = f68
+FR_r_lo = f69
+FR_rsq = f70
+FR_r_cubed = f71
+FR_C_hi = f72
+FR_N_0 = f73
+FR_d_1 = f74
+FR_V = f75
+FR_V_hi = f75
+FR_V_lo = f76
+FR_U_hi = f77
+FR_U_lo = f78
+FR_U_hiabs = f79
+FR_V_hiabs = f80
+FR_PP_8 = f81
+FR_QQ_8 = f81
+FR_PP_7 = f82
+FR_QQ_7 = f82
+FR_PP_6 = f83
+FR_QQ_6 = f83
+FR_PP_5 = f84
+FR_QQ_5 = f84
+FR_PP_4 = f85
+FR_QQ_4 = f85
+FR_PP_3 = f86
+FR_QQ_3 = f86
+FR_PP_2 = f87
+FR_QQ_2 = f87
+FR_QQ_1 = f88
+FR_N_0_fix = f89
+FR_Inv_P_0 = f90
+FR_corr = f91
+FR_poly = f92
+FR_d_2 = f93
+FR_Two_to_M3 = f94
+FR_Neg_Two_to_63 = f94
+FR_P_0 = f95
+FR_C_lo = f96
+FR_PP_1 = f97
+FR_PP_1_lo = f98
+FR_ArgPrime = f99
+
+GR_Table_Base = r32
+GR_Table_Base1 = r33
+GR_i_0 = r34
+GR_i_1 = r35
+GR_N_Inc = r36
+GR_Sin_or_Cos = r37
+
+GR_SAVE_B0 = r39
+GR_SAVE_GP = r40
+GR_SAVE_PFS = r41
+
+.section .text
+.proc __libm_sin_double_dbx#
+.align 64
+__libm_sin_double_dbx:
+
+{ .mlx
+alloc GR_Table_Base = ar.pfs,0,12,2,0
+ movl GR_Sin_or_Cos = 0x0 ;;
+}
+
+{ .mmi
+ nop.m 999
+ addl GR_Table_Base = @ltoff(FSINCOS_CONSTANTS#), gp
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ld8 GR_Table_Base = [GR_Table_Base]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+
+{ .mib
+ nop.m 999
+ nop.i 999
+ br.cond.sptk L(SINCOS_CONTINUE) ;;
+}
+
+.endp __libm_sin_double_dbx#
+ASM_SIZE_DIRECTIVE(__libm_sin_double_dbx)
+
+.section .text
+.proc __libm_cos_double_dbx#
+__libm_cos_double_dbx:
+
+{ .mlx
+alloc GR_Table_Base= ar.pfs,0,12,2,0
+ movl GR_Sin_or_Cos = 0x1 ;;
+}
+
+{ .mmi
+ nop.m 999
+ addl GR_Table_Base = @ltoff(FSINCOS_CONSTANTS#), gp
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ld8 GR_Table_Base = [GR_Table_Base]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+//
+// Load Table Address
+//
+L(SINCOS_CONTINUE):
+
+{ .mmi
+ add GR_Table_Base1 = 96, GR_Table_Base
+ ldfs FR_Two_to_24 = [GR_Table_Base], 4
+ nop.i 999
+}
+;;
+
+{ .mmi
+ nop.m 999
+//
+// Load 2**24, load 2**63.
+//
+ ldfs FR_Neg_Two_to_24 = [GR_Table_Base], 12
+ mov r41 = ar.pfs ;;
+}
+
+{ .mfi
+ ldfs FR_Two_to_63 = [GR_Table_Base1], 4
+//
+// Check for unnormals - unsupported operands. We do not want
+// to generate denormal exception
+// Check for NatVals, QNaNs, SNaNs, +/-Infs
+// Check for EM unsupporteds
+// Check for Zero
+//
+ fclass.m.unc p6, p8 = FR_Input_X, 0x1E3
+ mov r40 = gp ;;
+}
+
+{ .mfi
+ nop.m 999
+ fclass.nm.unc p8, p0 = FR_Input_X, 0x1FF
+// GR_Sin_or_Cos denotes
+ mov r39 = b0
+}
+
+{ .mfb
+ ldfs FR_Neg_Two_to_63 = [GR_Table_Base1], 12
+ fclass.m.unc p10, p0 = FR_Input_X, 0x007
+(p6) br.cond.spnt L(SINCOS_SPECIAL) ;;
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+(p8) br.cond.spnt L(SINCOS_SPECIAL) ;;
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+//
+// Branch if +/- NaN, Inf.
+// Load -2**24, load -2**63.
+//
+(p10) br.cond.spnt L(SINCOS_ZERO) ;;
+}
+
+{ .mmb
+ ldfe FR_Inv_pi_by_2 = [GR_Table_Base], 16
+ ldfe FR_Inv_P_0 = [GR_Table_Base1], 16
+ nop.b 999 ;;
+}
+
+{ .mmb
+ nop.m 999
+ ldfe FR_d_1 = [GR_Table_Base1], 16
+ nop.b 999 ;;
+}
+//
+// Raise possible denormal operand flag with useful fcmp
+// Is x <= -2**63
+// Load Inv_P_0 for pre-reduction
+// Load Inv_pi_by_2
+//
+
+{ .mmb
+ ldfe FR_P_0 = [GR_Table_Base], 16
+ ldfe FR_d_2 = [GR_Table_Base1], 16
+ nop.b 999 ;;
+}
+//
+// Load P_0
+// Load d_1
+// Is x >= 2**63
+// Is x <= -2**24?
+//
+
+{ .mmi
+ ldfe FR_P_1 = [GR_Table_Base], 16 ;;
+//
+// Load P_1
+// Load d_2
+// Is x >= 2**24?
+//
+ ldfe FR_P_2 = [GR_Table_Base], 16
+ nop.i 999 ;;
+}
+
+{ .mmf
+ nop.m 999
+ ldfe FR_P_3 = [GR_Table_Base], 16
+ fcmp.le.unc.s1 p7, p8 = FR_Input_X, FR_Neg_Two_to_24
+}
+
+{ .mfi
+ nop.m 999
+//
+// Branch if +/- zero.
+// Decide about the paths to take:
+// If -2**24 < FR_Input_X < 2**24 - CASE 1 OR 2
+// OTHERWISE - CASE 3 OR 4
+//
+ fcmp.le.unc.s0 p10, p11 = FR_Input_X, FR_Neg_Two_to_63
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p8) fcmp.ge.s1 p7, p0 = FR_Input_X, FR_Two_to_24
+ nop.i 999
+}
+
+{ .mfi
+ ldfe FR_Pi_by_4 = [GR_Table_Base1], 16
+(p11) fcmp.ge.s1 p10, p0 = FR_Input_X, FR_Two_to_63
+ nop.i 999 ;;
+}
+
+{ .mmi
+ ldfe FR_Neg_Pi_by_4 = [GR_Table_Base1], 16 ;;
+ ldfs FR_Two_to_M3 = [GR_Table_Base1], 4
+ nop.i 999 ;;
+}
+
+{ .mib
+ ldfs FR_Neg_Two_to_M3 = [GR_Table_Base1], 12
+ nop.i 999
+//
+// Load P_2
+// Load P_3
+// Load pi_by_4
+// Load neg_pi_by_4
+// Load 2**(-3)
+// Load -2**(-3).
+//
+(p10) br.cond.spnt L(SINCOS_ARG_TOO_LARGE) ;;
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+//
+// Branch out if x >= 2**63. Use Payne-Hanek Reduction
+//
+(p7) br.cond.spnt L(SINCOS_LARGER_ARG) ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Branch if Arg <= -2**24 or Arg >= 2**24 and use pre-reduction.
+//
+ fma.s1 FR_N_float = FR_Input_X, FR_Inv_pi_by_2, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+ fcmp.lt.unc.s1 p6, p7 = FR_Input_X, FR_Pi_by_4
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Select the case when |Arg| < pi/4
+// Else Select the case when |Arg| >= pi/4
+//
+ fcvt.fx.s1 FR_N_fix = FR_N_float
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N = Arg * 2/pi
+// Check if Arg < pi/4
+//
+(p6) fcmp.gt.s1 p6, p7 = FR_Input_X, FR_Neg_Pi_by_4
+ nop.i 999 ;;
+}
+//
+// Case 2: Convert integer N_fix back to normalized floating-point value.
+// Case 1: p8 is only affected when p6 is set
+//
+
+{ .mfi
+(p7) ldfs FR_Two_to_M33 = [GR_Table_Base1], 4
+//
+// Grab the integer part of N and call it N_fix
+//
+(p6) fmerge.se FR_r = FR_Input_X, FR_Input_X
+// If |x| < pi/4, r = x and c = 0
+// lf |x| < pi/4, is x < 2**(-3).
+// r = Arg
+// c = 0
+(p6) mov GR_N_Inc = GR_Sin_or_Cos ;;
+}
+
+{ .mmf
+ nop.m 999
+(p7) ldfs FR_Neg_Two_to_M33 = [GR_Table_Base1], 4
+(p6) fmerge.se FR_c = f0, f0
+}
+
+{ .mfi
+ nop.m 999
+(p6) fcmp.lt.unc.s1 p8, p9 = FR_Input_X, FR_Two_to_M3
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// lf |x| < pi/4, is -2**(-3)< x < 2**(-3) - set p8.
+// If |x| >= pi/4,
+// Create the right N for |x| < pi/4 and otherwise
+// Case 2: Place integer part of N in GP register
+//
+(p7) fcvt.xf FR_N_float = FR_N_fix
+ nop.i 999 ;;
+}
+
+{ .mmf
+ nop.m 999
+(p7) getf.sig GR_N_Inc = FR_N_fix
+(p8) fcmp.gt.s1 p8, p0 = FR_Input_X, FR_Neg_Two_to_M3 ;;
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+//
+// Load 2**(-33), -2**(-33)
+//
+(p8) br.cond.spnt L(SINCOS_SMALL_R) ;;
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+(p6) br.cond.sptk L(SINCOS_NORMAL_R) ;;
+}
+//
+// if |x| < pi/4, branch based on |x| < 2**(-3) or otherwise.
+//
+//
+// In this branch, |x| >= pi/4.
+//
+
+{ .mfi
+ ldfs FR_Neg_Two_to_M67 = [GR_Table_Base1], 8
+//
+// Load -2**(-67)
+//
+ fnma.s1 FR_s = FR_N_float, FR_P_1, FR_Input_X
+//
+// w = N * P_2
+// s = -N * P_1 + Arg
+//
+ add GR_N_Inc = GR_N_Inc, GR_Sin_or_Cos
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 FR_w = FR_N_float, FR_P_2, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Adjust N_fix by N_inc to determine whether sine or
+// cosine is being calculated
+//
+ fcmp.lt.unc.s1 p7, p6 = FR_s, FR_Two_to_M33
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p7) fcmp.gt.s1 p7, p6 = FR_s, FR_Neg_Two_to_M33
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+// Remember x >= pi/4.
+// Is s <= -2**(-33) or s >= 2**(-33) (p6)
+// or -2**(-33) < s < 2**(-33) (p7)
+(p6) fms.s1 FR_r = FR_s, f1, FR_w
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p7) fma.s1 FR_w = FR_N_float, FR_P_3, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p7) fma.s1 FR_U_1 = FR_N_float, FR_P_2, FR_w
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p6) fms.s1 FR_c = FR_s, f1, FR_r
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// For big s: r = s - w: No futher reduction is necessary
+// For small s: w = N * P_3 (change sign) More reduction
+//
+(p6) fcmp.lt.unc.s1 p8, p9 = FR_r, FR_Two_to_M3
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p8) fcmp.gt.s1 p8, p9 = FR_r, FR_Neg_Two_to_M3
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p7) fms.s1 FR_r = FR_s, f1, FR_U_1
+ nop.i 999
+}
+
+{ .mfb
+ nop.m 999
+//
+// For big s: Is |r| < 2**(-3)?
+// For big s: c = S - r
+// For small s: U_1 = N * P_2 + w
+//
+// If p8 is set, prepare to branch to Small_R.
+// If p9 is set, prepare to branch to Normal_R.
+// For big s, r is complete here.
+//
+(p6) fms.s1 FR_c = FR_c, f1, FR_w
+//
+// For big s: c = c + w (w has not been negated.)
+// For small s: r = S - U_1
+//
+(p8) br.cond.spnt L(SINCOS_SMALL_R) ;;
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+(p9) br.cond.sptk L(SINCOS_NORMAL_R) ;;
+}
+
+{ .mfi
+(p7) add GR_Table_Base1 = 224, GR_Table_Base1
+//
+// Branch to SINCOS_SMALL_R or SINCOS_NORMAL_R
+//
+(p7) fms.s1 FR_U_2 = FR_N_float, FR_P_2, FR_U_1
+//
+// c = S - U_1
+// r = S_1 * r
+//
+//
+(p7) extr.u GR_i_1 = GR_N_Inc, 0, 1
+}
+
+{ .mmi
+ nop.m 999 ;;
+//
+// Get [i_0,i_1] - two lsb of N_fix_gr.
+// Do dummy fmpy so inexact is always set.
+//
+(p7) cmp.eq.unc p9, p10 = 0x0, GR_i_1
+(p7) extr.u GR_i_0 = GR_N_Inc, 1, 1 ;;
+}
+//
+// For small s: U_2 = N * P_2 - U_1
+// S_1 stored constant - grab the one stored with the
+// coefficients.
+//
+
+{ .mfi
+(p7) ldfe FR_S_1 = [GR_Table_Base1], 16
+//
+// Check if i_1 and i_0 != 0
+//
+(p10) fma.s1 FR_poly = f0, f1, FR_Neg_Two_to_M67
+(p7) cmp.eq.unc p11, p12 = 0x0, GR_i_0 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p7) fms.s1 FR_s = FR_s, f1, FR_r
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+//
+// S = S - r
+// U_2 = U_2 + w
+// load S_1
+//
+(p7) fma.s1 FR_rsq = FR_r, FR_r, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p7) fma.s1 FR_U_2 = FR_U_2, f1, FR_w
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p7) fmerge.se FR_Input_X = FR_r, FR_r
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p10) fma.s1 FR_Input_X = f0, f1, f1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// FR_rsq = r * r
+// Save r as the result.
+//
+(p7) fms.s1 FR_c = FR_s, f1, FR_U_1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// if ( i_1 ==0) poly = c + S_1*r*r*r
+// else Result = 1
+//
+(p12) fnma.s1 FR_Input_X = FR_Input_X, f1, f0
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p7) fma.s1 FR_r = FR_S_1, FR_r, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p7) fma.d.s0 FR_S_1 = FR_S_1, FR_S_1, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// If i_1 != 0, poly = 2**(-67)
+//
+(p7) fms.s1 FR_c = FR_c, f1, FR_U_2
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// c = c - U_2
+//
+(p9) fma.s1 FR_poly = FR_r, FR_rsq, FR_c
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// i_0 != 0, so Result = -Result
+//
+(p11) fma.d.s0 FR_Input_X = FR_Input_X, f1, FR_poly
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p12) fms.d.s0 FR_Input_X = FR_Input_X, f1, FR_poly
+//
+// if (i_0 == 0), Result = Result + poly
+// else Result = Result - poly
+//
+ br.ret.sptk b0 ;;
+}
+L(SINCOS_LARGER_ARG):
+
+{ .mfi
+ nop.m 999
+ fma.s1 FR_N_0 = FR_Input_X, FR_Inv_P_0, f0
+ nop.i 999
+}
+;;
+
+// This path for argument > 2*24
+// Adjust table_ptr1 to beginning of table.
+//
+
+{ .mmi
+ nop.m 999
+ addl GR_Table_Base = @ltoff(FSINCOS_CONSTANTS#), gp
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ld8 GR_Table_Base = [GR_Table_Base]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+
+//
+// Point to 2*-14
+// N_0 = Arg * Inv_P_0
+//
+
+{ .mmi
+ add GR_Table_Base = 688, GR_Table_Base ;;
+ ldfs FR_Two_to_M14 = [GR_Table_Base], 4
+ nop.i 999 ;;
+}
+
+{ .mfi
+ ldfs FR_Neg_Two_to_M14 = [GR_Table_Base], 0
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Load values 2**(-14) and -2**(-14)
+//
+ fcvt.fx.s1 FR_N_0_fix = FR_N_0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N_0_fix = integer part of N_0
+//
+ fcvt.xf FR_N_0 = FR_N_0_fix
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Make N_0 the integer part
+//
+ fnma.s1 FR_ArgPrime = FR_N_0, FR_P_0, FR_Input_X
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 FR_w = FR_N_0, FR_d_1, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Arg' = -N_0 * P_0 + Arg
+// w = N_0 * d_1
+//
+ fma.s1 FR_N_float = FR_ArgPrime, FR_Inv_pi_by_2, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N = A' * 2/pi
+//
+ fcvt.fx.s1 FR_N_fix = FR_N_float
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N_fix is the integer part
+//
+ fcvt.xf FR_N_float = FR_N_fix
+ nop.i 999 ;;
+}
+
+{ .mfi
+ getf.sig GR_N_Inc = FR_N_fix
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mii
+ nop.m 999
+ nop.i 999 ;;
+ add GR_N_Inc = GR_N_Inc, GR_Sin_or_Cos ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N is the integer part of the reduced-reduced argument.
+// Put the integer in a GP register
+//
+ fnma.s1 FR_s = FR_N_float, FR_P_1, FR_ArgPrime
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+ fnma.s1 FR_w = FR_N_float, FR_P_2, FR_w
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// s = -N*P_1 + Arg'
+// w = -N*P_2 + w
+// N_fix_gr = N_fix_gr + N_inc
+//
+ fcmp.lt.unc.s1 p9, p8 = FR_s, FR_Two_to_M14
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p9) fcmp.gt.s1 p9, p8 = FR_s, FR_Neg_Two_to_M14
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// For |s| > 2**(-14) r = S + w (r complete)
+// Else U_hi = N_0 * d_1
+//
+(p9) fma.s1 FR_V_hi = FR_N_float, FR_P_2, f0
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p9) fma.s1 FR_U_hi = FR_N_0, FR_d_1, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Either S <= -2**(-14) or S >= 2**(-14)
+// or -2**(-14) < s < 2**(-14)
+//
+(p8) fma.s1 FR_r = FR_s, f1, FR_w
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p9) fma.s1 FR_w = FR_N_float, FR_P_3, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// We need abs of both U_hi and V_hi - don't
+// worry about switched sign of V_hi.
+//
+(p9) fms.s1 FR_A = FR_U_hi, f1, FR_V_hi
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+//
+// Big s: finish up c = (S - r) + w (c complete)
+// Case 4: A = U_hi + V_hi
+// Note: Worry about switched sign of V_hi, so subtract instead of add.
+//
+(p9) fnma.s1 FR_V_lo = FR_N_float, FR_P_2, FR_V_hi
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p9) fms.s1 FR_U_lo = FR_N_0, FR_d_1, FR_U_hi
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p9) fmerge.s FR_V_hiabs = f0, FR_V_hi
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+// For big s: c = S - r
+// For small s do more work: U_lo = N_0 * d_1 - U_hi
+//
+(p9) fmerge.s FR_U_hiabs = f0, FR_U_hi
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// For big s: Is |r| < 2**(-3)
+// For big s: if p12 set, prepare to branch to Small_R.
+// For big s: If p13 set, prepare to branch to Normal_R.
+//
+(p8) fms.s1 FR_c = FR_s, f1, FR_r
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+//
+// For small S: V_hi = N * P_2
+// w = N * P_3
+// Note the product does not include the (-) as in the writeup
+// so (-) missing for V_hi and w.
+//
+(p8) fcmp.lt.unc.s1 p12, p13 = FR_r, FR_Two_to_M3
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p12) fcmp.gt.s1 p12, p13 = FR_r, FR_Neg_Two_to_M3
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 FR_c = FR_c, f1, FR_w
+ nop.i 999
+}
+
+{ .mfb
+ nop.m 999
+(p9) fms.s1 FR_w = FR_N_0, FR_d_2, FR_w
+(p12) br.cond.spnt L(SINCOS_SMALL_R) ;;
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+(p13) br.cond.sptk L(SINCOS_NORMAL_R) ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Big s: Vector off when |r| < 2**(-3). Recall that p8 will be true.
+// The remaining stuff is for Case 4.
+// Small s: V_lo = N * P_2 + U_hi (U_hi is in place of V_hi in writeup)
+// Note: the (-) is still missing for V_lo.
+// Small s: w = w + N_0 * d_2
+// Note: the (-) is now incorporated in w.
+//
+(p9) fcmp.ge.unc.s1 p10, p11 = FR_U_hiabs, FR_V_hiabs
+ extr.u GR_i_1 = GR_N_Inc, 0, 1 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// C_hi = S + A
+//
+(p9) fma.s1 FR_t = FR_U_lo, f1, FR_V_lo
+ extr.u GR_i_0 = GR_N_Inc, 1, 1 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// t = U_lo + V_lo
+//
+//
+(p10) fms.s1 FR_a = FR_U_hi, f1, FR_A
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p11) fma.s1 FR_a = FR_V_hi, f1, FR_A
+ nop.i 999
+}
+;;
+
+{ .mmi
+ nop.m 999
+ addl GR_Table_Base = @ltoff(FSINCOS_CONSTANTS#), gp
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ld8 GR_Table_Base = [GR_Table_Base]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+
+{ .mfi
+ add GR_Table_Base = 528, GR_Table_Base
+//
+// Is U_hiabs >= V_hiabs?
+//
+(p9) fma.s1 FR_C_hi = FR_s, f1, FR_A
+ nop.i 999 ;;
+}
+
+{ .mmi
+ ldfe FR_C_1 = [GR_Table_Base], 16 ;;
+ ldfe FR_C_2 = [GR_Table_Base], 64
+ nop.i 999 ;;
+}
+
+{ .mmf
+ nop.m 999
+//
+// c = c + C_lo finished.
+// Load C_2
+//
+ ldfe FR_S_1 = [GR_Table_Base], 16
+//
+// C_lo = S - C_hi
+//
+ fma.s1 FR_t = FR_t, f1, FR_w ;;
+}
+//
+// r and c have been computed.
+// Make sure ftz mode is set - should be automatic when using wre
+// |r| < 2**(-3)
+// Get [i_0,i_1] - two lsb of N_fix.
+// Load S_1
+//
+
+{ .mfi
+ ldfe FR_S_2 = [GR_Table_Base], 64
+//
+// t = t + w
+//
+(p10) fms.s1 FR_a = FR_a, f1, FR_V_hi
+ cmp.eq.unc p9, p10 = 0x0, GR_i_0
+}
+
+{ .mfi
+ nop.m 999
+//
+// For larger u than v: a = U_hi - A
+// Else a = V_hi - A (do an add to account for missing (-) on V_hi
+//
+ fms.s1 FR_C_lo = FR_s, f1, FR_C_hi
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p11) fms.s1 FR_a = FR_U_hi, f1, FR_a
+ cmp.eq.unc p11, p12 = 0x0, GR_i_1
+}
+
+{ .mfi
+ nop.m 999
+//
+// If u > v: a = (U_hi - A) + V_hi
+// Else a = (V_hi - A) + U_hi
+// In each case account for negative missing from V_hi.
+//
+ fma.s1 FR_C_lo = FR_C_lo, f1, FR_A
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// C_lo = (S - C_hi) + A
+//
+ fma.s1 FR_t = FR_t, f1, FR_a
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// t = t + a
+//
+ fma.s1 FR_C_lo = FR_C_lo, f1, FR_t
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// C_lo = C_lo + t
+// Adjust Table_Base to beginning of table
+//
+ fma.s1 FR_r = FR_C_hi, f1, FR_C_lo
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Load S_2
+//
+ fma.s1 FR_rsq = FR_r, FR_r, f0
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+//
+// Table_Base points to C_1
+// r = C_hi + C_lo
+//
+ fms.s1 FR_c = FR_C_hi, f1, FR_r
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// if i_1 ==0: poly = S_2 * FR_rsq + S_1
+// else poly = C_2 * FR_rsq + C_1
+//
+(p11) fma.s1 FR_Input_X = f0, f1, FR_r
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p12) fma.s1 FR_Input_X = f0, f1, f1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Compute r_cube = FR_rsq * r
+//
+(p11) fma.s1 FR_poly = FR_rsq, FR_S_2, FR_S_1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p12) fma.s1 FR_poly = FR_rsq, FR_C_2, FR_C_1
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+//
+// Compute FR_rsq = r * r
+// Is i_1 == 0 ?
+//
+ fma.s1 FR_r_cubed = FR_rsq, FR_r, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// c = C_hi - r
+// Load C_1
+//
+ fma.s1 FR_c = FR_c, f1, FR_C_lo
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+//
+// if i_1 ==0: poly = r_cube * poly + c
+// else poly = FR_rsq * poly
+//
+(p10) fms.s1 FR_Input_X = f0, f1, FR_Input_X
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// if i_1 ==0: Result = r
+// else Result = 1.0
+//
+(p11) fma.s1 FR_poly = FR_r_cubed, FR_poly, FR_c
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p12) fma.s1 FR_poly = FR_rsq, FR_poly, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// if i_0 !=0: Result = -Result
+//
+(p9) fma.d.s0 FR_Input_X = FR_Input_X, f1, FR_poly
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p10) fms.d.s0 FR_Input_X = FR_Input_X, f1, FR_poly
+//
+// if i_0 == 0: Result = Result + poly
+// else Result = Result - poly
+//
+ br.ret.sptk b0 ;;
+}
+L(SINCOS_SMALL_R):
+
+{ .mii
+ nop.m 999
+ extr.u GR_i_1 = GR_N_Inc, 0, 1 ;;
+//
+//
+// Compare both i_1 and i_0 with 0.
+// if i_1 == 0, set p9.
+// if i_0 == 0, set p11.
+//
+ cmp.eq.unc p9, p10 = 0x0, GR_i_1 ;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 FR_rsq = FR_r, FR_r, f0
+ extr.u GR_i_0 = GR_N_Inc, 1, 1 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Z = Z * FR_rsq
+//
+(p10) fnma.s1 FR_c = FR_c, FR_r, f0
+ cmp.eq.unc p11, p12 = 0x0, GR_i_0
+}
+;;
+
+// ******************************************************************
+// ******************************************************************
+// ******************************************************************
+// r and c have been computed.
+// We know whether this is the sine or cosine routine.
+// Make sure ftz mode is set - should be automatic when using wre
+// |r| < 2**(-3)
+//
+// Set table_ptr1 to beginning of constant table.
+// Get [i_0,i_1] - two lsb of N_fix_gr.
+//
+
+{ .mmi
+ nop.m 999
+ addl GR_Table_Base = @ltoff(FSINCOS_CONSTANTS#), gp
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ld8 GR_Table_Base = [GR_Table_Base]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+
+//
+// Set table_ptr1 to point to S_5.
+// Set table_ptr1 to point to C_5.
+// Compute FR_rsq = r * r
+//
+
+{ .mfi
+(p9) add GR_Table_Base = 672, GR_Table_Base
+(p10) fmerge.s FR_r = f1, f1
+(p10) add GR_Table_Base = 592, GR_Table_Base ;;
+}
+//
+// Set table_ptr1 to point to S_5.
+// Set table_ptr1 to point to C_5.
+//
+
+{ .mmi
+(p9) ldfe FR_S_5 = [GR_Table_Base], -16 ;;
+//
+// if (i_1 == 0) load S_5
+// if (i_1 != 0) load C_5
+//
+(p9) ldfe FR_S_4 = [GR_Table_Base], -16
+ nop.i 999 ;;
+}
+
+{ .mmf
+(p10) ldfe FR_C_5 = [GR_Table_Base], -16
+//
+// Z = FR_rsq * FR_rsq
+//
+(p9) ldfe FR_S_3 = [GR_Table_Base], -16
+//
+// Compute FR_rsq = r * r
+// if (i_1 == 0) load S_4
+// if (i_1 != 0) load C_4
+//
+ fma.s1 FR_Z = FR_rsq, FR_rsq, f0 ;;
+}
+//
+// if (i_1 == 0) load S_3
+// if (i_1 != 0) load C_3
+//
+
+{ .mmi
+(p9) ldfe FR_S_2 = [GR_Table_Base], -16 ;;
+//
+// if (i_1 == 0) load S_2
+// if (i_1 != 0) load C_2
+//
+(p9) ldfe FR_S_1 = [GR_Table_Base], -16
+ nop.i 999
+}
+
+{ .mmi
+(p10) ldfe FR_C_4 = [GR_Table_Base], -16 ;;
+(p10) ldfe FR_C_3 = [GR_Table_Base], -16
+ nop.i 999 ;;
+}
+
+{ .mmi
+(p10) ldfe FR_C_2 = [GR_Table_Base], -16 ;;
+(p10) ldfe FR_C_1 = [GR_Table_Base], -16
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+//
+// if (i_1 != 0):
+// poly_lo = FR_rsq * C_5 + C_4
+// poly_hi = FR_rsq * C_2 + C_1
+//
+(p9) fma.s1 FR_Z = FR_Z, FR_r, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// if (i_1 == 0) load S_1
+// if (i_1 != 0) load C_1
+//
+(p9) fma.s1 FR_poly_lo = FR_rsq, FR_S_5, FR_S_4
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+//
+// c = -c * r
+// dummy fmpy's to flag inexact.
+//
+(p9) fma.d.s0 FR_S_4 = FR_S_4, FR_S_4, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// poly_lo = FR_rsq * poly_lo + C_3
+// poly_hi = FR_rsq * poly_hi
+//
+ fma.s1 FR_Z = FR_Z, FR_rsq, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p9) fma.s1 FR_poly_hi = FR_rsq, FR_S_2, FR_S_1
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+//
+// if (i_1 == 0):
+// poly_lo = FR_rsq * S_5 + S_4
+// poly_hi = FR_rsq * S_2 + S_1
+//
+(p10) fma.s1 FR_poly_lo = FR_rsq, FR_C_5, FR_C_4
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// if (i_1 == 0):
+// Z = Z * r for only one of the small r cases - not there
+// in original implementation notes.
+//
+(p9) fma.s1 FR_poly_lo = FR_rsq, FR_poly_lo, FR_S_3
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p10) fma.s1 FR_poly_hi = FR_rsq, FR_C_2, FR_C_1
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p10) fma.d.s0 FR_C_1 = FR_C_1, FR_C_1, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p9) fma.s1 FR_poly_hi = FR_poly_hi, FR_rsq, f0
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+//
+// poly_lo = FR_rsq * poly_lo + S_3
+// poly_hi = FR_rsq * poly_hi
+//
+(p10) fma.s1 FR_poly_lo = FR_rsq, FR_poly_lo, FR_C_3
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p10) fma.s1 FR_poly_hi = FR_poly_hi, FR_rsq, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// if (i_1 == 0): dummy fmpy's to flag inexact
+// r = 1
+//
+(p9) fma.s1 FR_poly_hi = FR_r, FR_poly_hi, f0
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+//
+// poly_hi = r * poly_hi
+//
+ fma.s1 FR_poly = FR_Z, FR_poly_lo, FR_c
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p12) fms.s1 FR_r = f0, f1, FR_r
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// poly_hi = Z * poly_lo + c
+// if i_0 == 1: r = -r
+//
+ fma.s1 FR_poly = FR_poly, f1, FR_poly_hi
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p12) fms.d.s0 FR_Input_X = FR_r, f1, FR_poly
+ nop.i 999
+}
+
+{ .mfb
+ nop.m 999
+//
+// poly = poly + poly_hi
+//
+(p11) fma.d.s0 FR_Input_X = FR_r, f1, FR_poly
+//
+// if (i_0 == 0) Result = r + poly
+// if (i_0 != 0) Result = r - poly
+//
+ br.ret.sptk b0 ;;
+}
+L(SINCOS_NORMAL_R):
+
+{ .mii
+ nop.m 999
+ extr.u GR_i_1 = GR_N_Inc, 0, 1 ;;
+//
+// Set table_ptr1 and table_ptr2 to base address of
+// constant table.
+ cmp.eq.unc p9, p10 = 0x0, GR_i_1 ;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 FR_rsq = FR_r, FR_r, f0
+ extr.u GR_i_0 = GR_N_Inc, 1, 1 ;;
+}
+
+{ .mfi
+ nop.m 999
+ frcpa.s1 FR_r_hi, p6 = f1, FR_r
+ cmp.eq.unc p11, p12 = 0x0, GR_i_0
+}
+;;
+
+// ******************************************************************
+// ******************************************************************
+// ******************************************************************
+//
+// r and c have been computed.
+// We known whether this is the sine or cosine routine.
+// Make sure ftz mode is set - should be automatic when using wre
+// Get [i_0,i_1] - two lsb of N_fix_gr alone.
+//
+
+{ .mmi
+ nop.m 999
+ addl GR_Table_Base = @ltoff(FSINCOS_CONSTANTS#), gp
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ld8 GR_Table_Base = [GR_Table_Base]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+
+{ .mfi
+(p10) add GR_Table_Base = 384, GR_Table_Base
+(p12) fms.s1 FR_Input_X = f0, f1, f1
+(p9) add GR_Table_Base = 224, GR_Table_Base ;;
+}
+
+{ .mmf
+ nop.m 999
+(p10) ldfe FR_QQ_8 = [GR_Table_Base], 16
+//
+// if (i_1==0) poly = poly * FR_rsq + PP_1_lo
+// else poly = FR_rsq * poly
+//
+(p11) fma.s1 FR_Input_X = f0, f1, f1 ;;
+}
+
+{ .mmf
+(p10) ldfe FR_QQ_7 = [GR_Table_Base], 16
+//
+// Adjust table pointers based on i_0
+// Compute rsq = r * r
+//
+(p9) ldfe FR_PP_8 = [GR_Table_Base], 16
+ fma.s1 FR_r_cubed = FR_r, FR_rsq, f0 ;;
+}
+
+{ .mmf
+(p9) ldfe FR_PP_7 = [GR_Table_Base], 16
+(p10) ldfe FR_QQ_6 = [GR_Table_Base], 16
+//
+// Load PP_8 and QQ_8; PP_7 and QQ_7
+//
+ frcpa.s1 FR_r_hi, p6 = f1, FR_r_hi ;;
+}
+//
+// if (i_1==0) poly = PP_7 + FR_rsq * PP_8.
+// else poly = QQ_7 + FR_rsq * QQ_8.
+//
+
+{ .mmb
+(p9) ldfe FR_PP_6 = [GR_Table_Base], 16
+(p10) ldfe FR_QQ_5 = [GR_Table_Base], 16
+ nop.b 999 ;;
+}
+
+{ .mmb
+(p9) ldfe FR_PP_5 = [GR_Table_Base], 16
+(p10) ldfe FR_S_1 = [GR_Table_Base], 16
+ nop.b 999 ;;
+}
+
+{ .mmb
+(p10) ldfe FR_QQ_1 = [GR_Table_Base], 16
+(p9) ldfe FR_C_1 = [GR_Table_Base], 16
+ nop.b 999 ;;
+}
+
+{ .mmi
+(p10) ldfe FR_QQ_4 = [GR_Table_Base], 16 ;;
+(p9) ldfe FR_PP_1 = [GR_Table_Base], 16
+ nop.i 999 ;;
+}
+
+{ .mmf
+(p10) ldfe FR_QQ_3 = [GR_Table_Base], 16
+//
+// if (i_1=0) corr = corr + c*c
+// else corr = corr * c
+//
+(p9) ldfe FR_PP_4 = [GR_Table_Base], 16
+(p10) fma.s1 FR_poly = FR_rsq, FR_QQ_8, FR_QQ_7 ;;
+}
+//
+// if (i_1=0) poly = rsq * poly + PP_5
+// else poly = rsq * poly + QQ_5
+// Load PP_4 or QQ_4
+//
+
+{ .mmf
+(p9) ldfe FR_PP_3 = [GR_Table_Base], 16
+(p10) ldfe FR_QQ_2 = [GR_Table_Base], 16
+//
+// r_hi = frcpa(frcpa(r)).
+// r_cube = r * FR_rsq.
+//
+(p9) fma.s1 FR_poly = FR_rsq, FR_PP_8, FR_PP_7 ;;
+}
+//
+// Do dummy multiplies so inexact is always set.
+//
+
+{ .mfi
+(p9) ldfe FR_PP_2 = [GR_Table_Base], 16
+//
+// r_lo = r - r_hi
+//
+(p9) fma.s1 FR_U_lo = FR_r_hi, FR_r_hi, f0
+ nop.i 999 ;;
+}
+
+{ .mmf
+ nop.m 999
+(p9) ldfe FR_PP_1_lo = [GR_Table_Base], 16
+(p10) fma.s1 FR_corr = FR_S_1, FR_r_cubed, FR_r
+}
+
+{ .mfi
+ nop.m 999
+(p10) fma.s1 FR_poly = FR_rsq, FR_poly, FR_QQ_6
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// if (i_1=0) U_lo = r_hi * r_hi
+// else U_lo = r_hi + r
+//
+(p9) fma.s1 FR_corr = FR_C_1, FR_rsq, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// if (i_1=0) corr = C_1 * rsq
+// else corr = S_1 * r_cubed + r
+//
+(p9) fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_6
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p10) fma.s1 FR_U_lo = FR_r_hi, f1, FR_r
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// if (i_1=0) U_hi = r_hi + U_hi
+// else U_hi = QQ_1 * U_hi + 1
+//
+(p9) fma.s1 FR_U_lo = FR_r, FR_r_hi, FR_U_lo
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+//
+// U_hi = r_hi * r_hi
+//
+ fms.s1 FR_r_lo = FR_r, f1, FR_r_hi
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Load PP_1, PP_6, PP_5, and C_1
+// Load QQ_1, QQ_6, QQ_5, and S_1
+//
+ fma.s1 FR_U_hi = FR_r_hi, FR_r_hi, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p10) fma.s1 FR_poly = FR_rsq, FR_poly, FR_QQ_5
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p10) fnma.s1 FR_corr = FR_corr, FR_c, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// if (i_1=0) U_lo = r * r_hi + U_lo
+// else U_lo = r_lo * U_lo
+//
+(p9) fma.s1 FR_corr = FR_corr, FR_c, FR_c
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p9) fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_5
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+//
+// if (i_1 =0) U_hi = r + U_hi
+// if (i_1 =0) U_lo = r_lo * U_lo
+//
+//
+(p9) fma.d.s0 FR_PP_5 = FR_PP_5, FR_PP_4, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p9) fma.s1 FR_U_lo = FR_r, FR_r, FR_U_lo
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p10) fma.s1 FR_U_lo = FR_r_lo, FR_U_lo, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// if (i_1=0) poly = poly * rsq + PP_6
+// else poly = poly * rsq + QQ_6
+//
+(p9) fma.s1 FR_U_hi = FR_r_hi, FR_U_hi, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p10) fma.s1 FR_poly = FR_rsq, FR_poly, FR_QQ_4
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p10) fma.s1 FR_U_hi = FR_QQ_1, FR_U_hi, f1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p10) fma.d.s0 FR_QQ_5 = FR_QQ_5, FR_QQ_5, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// if (i_1!=0) U_hi = PP_1 * U_hi
+// if (i_1!=0) U_lo = r * r + U_lo
+// Load PP_3 or QQ_3
+//
+(p9) fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_4
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p9) fma.s1 FR_U_lo = FR_r_lo, FR_U_lo, f0
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p10) fma.s1 FR_U_lo = FR_QQ_1,FR_U_lo, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p9) fma.s1 FR_U_hi = FR_PP_1, FR_U_hi, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p10) fma.s1 FR_poly = FR_rsq, FR_poly, FR_QQ_3
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Load PP_2, QQ_2
+//
+(p9) fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_3
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// if (i_1==0) poly = FR_rsq * poly + PP_3
+// else poly = FR_rsq * poly + QQ_3
+// Load PP_1_lo
+//
+(p9) fma.s1 FR_U_lo = FR_PP_1, FR_U_lo, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// if (i_1 =0) poly = poly * rsq + pp_r4
+// else poly = poly * rsq + qq_r4
+//
+(p9) fma.s1 FR_U_hi = FR_r, f1, FR_U_hi
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p10) fma.s1 FR_poly = FR_rsq, FR_poly, FR_QQ_2
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// if (i_1==0) U_lo = PP_1_hi * U_lo
+// else U_lo = QQ_1 * U_lo
+//
+(p9) fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_2
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// if (i_0==0) Result = 1
+// else Result = -1
+//
+ fma.s1 FR_V = FR_U_lo, f1, FR_corr
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p10) fma.s1 FR_poly = FR_rsq, FR_poly, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// if (i_1==0) poly = FR_rsq * poly + PP_2
+// else poly = FR_rsq * poly + QQ_2
+//
+(p9) fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_1_lo
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p10) fma.s1 FR_poly = FR_rsq, FR_poly, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// V = U_lo + corr
+//
+(p9) fma.s1 FR_poly = FR_r_cubed, FR_poly, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// if (i_1==0) poly = r_cube * poly
+// else poly = FR_rsq * poly
+//
+ fma.s1 FR_V = FR_poly, f1, FR_V
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p12) fms.d.s0 FR_Input_X = FR_Input_X, FR_U_hi, FR_V
+ nop.i 999
+}
+
+{ .mfb
+ nop.m 999
+//
+// V = V + poly
+//
+(p11) fma.d.s0 FR_Input_X = FR_Input_X, FR_U_hi, FR_V
+//
+// if (i_0==0) Result = Result * U_hi + V
+// else Result = Result * U_hi - V
+//
+ br.ret.sptk b0 ;;
+}
+
+//
+// If cosine, FR_Input_X = 1
+// If sine, FR_Input_X = +/-Zero (Input FR_Input_X)
+// Results are exact, no exceptions
+//
+L(SINCOS_ZERO):
+
+{ .mmb
+ cmp.eq.unc p6, p7 = 0x1, GR_Sin_or_Cos
+ nop.m 999
+ nop.b 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p7) fmerge.s FR_Input_X = FR_Input_X, FR_Input_X
+ nop.i 999
+}
+
+{ .mfb
+ nop.m 999
+(p6) fmerge.s FR_Input_X = f1, f1
+ br.ret.sptk b0 ;;
+}
+
+L(SINCOS_SPECIAL):
+
+//
+// Path for Arg = +/- QNaN, SNaN, Inf
+// Invalid can be raised. SNaNs
+// become QNaNs
+//
+
+{ .mfb
+ nop.m 999
+ fmpy.d.s0 FR_Input_X = FR_Input_X, f0
+ br.ret.sptk b0 ;;
+}
+.endp __libm_cos_double_dbx#
+ASM_SIZE_DIRECTIVE(__libm_cos_double_dbx#)
+
+
+
+//
+// Call int pi_by_2_reduce(double* x, double *y)
+// for |arguments| >= 2**63
+// Address to save r and c as double
+//
+//
+// psp sp+64
+// sp+48 -> f0 c
+// r45 sp+32 -> f0 r
+// r44 -> sp+16 -> InputX
+// sp sp -> scratch provided to callee
+
+
+
+.proc __libm_callout_2
+__libm_callout_2:
+L(SINCOS_ARG_TOO_LARGE):
+
+.prologue
+{ .mfi
+ add r45=-32,sp // Parameter: r address
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfe [r45] = f0,16 // Clear Parameter r on stack
+ add r44 = 16,sp // Parameter x address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mib
+ stfe [r45] = f0,-16 // Clear Parameter c on stack
+ nop.i 0
+ nop.b 0
+}
+{ .mib
+ stfe [r44] = FR_Input_X // Store Parameter x on stack
+ nop.i 0
+ br.call.sptk b0=__libm_pi_by_2_reduce# ;;
+};;
+
+
+{ .mii
+ ldfe FR_Input_X =[r44],16
+//
+// Get r and c off stack
+//
+ adds GR_Table_Base1 = -16, GR_Table_Base1
+//
+// Get r and c off stack
+//
+ add GR_N_Inc = GR_Sin_or_Cos,r8 ;;
+}
+{ .mmb
+ ldfe FR_r =[r45],16
+//
+// Get X off the stack
+// Readjust Table ptr
+//
+ ldfs FR_Two_to_M3 = [GR_Table_Base1],4
+ nop.b 999 ;;
+}
+{ .mmb
+ ldfs FR_Neg_Two_to_M3 = [GR_Table_Base1],0
+ ldfe FR_c =[r45]
+ nop.b 999 ;;
+}
+
+{ .mfi
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ fcmp.lt.unc.s1 p6, p0 = FR_r, FR_Two_to_M3
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ nop.b 0
+};;
+
+
+{ .mfi
+ nop.m 999
+(p6) fcmp.gt.unc.s1 p6, p0 = FR_r, FR_Neg_Two_to_M3
+ nop.i 999 ;;
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+(p6) br.cond.spnt L(SINCOS_SMALL_R) ;;
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+ br.cond.sptk L(SINCOS_NORMAL_R) ;;
+}
+
+.endp __libm_callout_2
+ASM_SIZE_DIRECTIVE(__libm_callout_2)
+
+.type __libm_pi_by_2_reduce#,@function
+.global __libm_pi_by_2_reduce#
+
+
+.type __libm_sin_double_dbx#,@function
+.global __libm_sin_double_dbx#
+.type __libm_cos_double_dbx#,@function
+.global __libm_cos_double_dbx#
diff --git a/sysdeps/ia64/fpu/s_cosf.S b/sysdeps/ia64/fpu/s_cosf.S
new file mode 100644
index 0000000..111d7da
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_cosf.S
@@ -0,0 +1,686 @@
+
+.file "sincosf.s"
+
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+
+
+// History
+//==============================================================
+// 2/02/00 Initial revision
+// 4/02/00 Unwind support added.
+// 5/10/00 Improved speed with new algorithm.
+// 8/08/00 Improved speed by avoiding SIR flush.
+// 8/17/00 Changed predicate register macro-usage to direct predicate
+// names due to an assembler bug.
+// 8/30/00 Put sin_of_r before sin_tbl_S_cos_of_r to gain a cycle
+// 1/02/00 Fixed flag settings, improved speed.
+//
+// API
+//==============================================================
+// float sinf( float x);
+// float cosf( float x);
+//
+
+#include "libm_support.h"
+
+// Assembly macros
+//==============================================================
+
+// SIN_Sin_Flag = p6
+// SIN_Cos_Flag = p7
+
+// integer registers used
+
+ SIN_AD_PQ_1 = r33
+ SIN_AD_PQ_2 = r33
+ sin_GR_sincos_flag = r34
+ sin_GR_Mint = r35
+
+ sin_GR_index = r36
+ gr_tmp = r37
+
+ GR_SAVE_B0 = r37
+ GR_SAVE_GP = r38
+ GR_SAVE_PFS = r39
+
+
+// floating point registers used
+
+ sin_coeff_P1 = f32
+ sin_coeff_P2 = f33
+ sin_coeff_Q1 = f34
+ sin_coeff_Q2 = f35
+ sin_coeff_P4 = f36
+ sin_coeff_P5 = f37
+ sin_coeff_Q3 = f38
+ sin_coeff_Q4 = f39
+ sin_Mx = f40
+ sin_Mfloat = f41
+ sin_tbl_S = f42
+ sin_tbl_C = f43
+ sin_r = f44
+ sin_rcube = f45
+ sin_tsq = f46
+ sin_r7 = f47
+ sin_t = f48
+ sin_poly_p2 = f49
+ sin_poly_p1 = f50
+ fp_tmp = f51
+ sin_poly_p3 = f52
+ sin_poly_p4 = f53
+ sin_of_r = f54
+ sin_S_t = f55
+ sin_poly_q2 = f56
+ sin_poly_q1 = f57
+ sin_S_tcube = f58
+ sin_poly_q3 = f59
+ sin_poly_q4 = f60
+ sin_tbl_S_tcube = f61
+ sin_tbl_S_cos_of_r = f62
+
+ sin_coeff_Q5 = f63
+ sin_coeff_Q6 = f64
+ sin_coeff_P3 = f65
+
+ sin_poly_q5 = f66
+ sin_poly_q12 = f67
+ sin_poly_q3456 = f68
+ fp_tmp2 = f69
+ SIN_NORM_f8 = f70
+
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
+.align 16
+
+sin_coeff_1_table:
+ASM_TYPE_DIRECTIVE(sin_coeff_1_table,@object)
+data8 0xBF56C16C16BF6462 // q3
+data8 0x3EFA01A0128B9EBC // q4
+data8 0xBE927E42FDF33FFE // q5
+data8 0x3E21DA5C72A446F3 // q6
+data8 0x3EC71DD1D5E421A4 // p4
+data8 0xBE5AC5C9D0ACF95A // p5
+data8 0xBFC55555555554CA // p1
+data8 0x3F811111110F2395 // p2
+data8 0xBFE0000000000000 // q1
+data8 0x3FA55555555554EF // q2
+data8 0xBF2A01A011232913 // p3
+data8 0x0000000000000000 // pad
+
+
+/////////////////////////////////////////
+
+data8 0xBFE1A54991426566 //sin(-32)
+data8 0x3FEAB1F5305DE8E5 //cos(-32)
+data8 0x3FD9DBC0B640FC81 //sin(-31)
+data8 0x3FED4591C3E12A20 //cos(-31)
+data8 0x3FEF9DF47F1C903D //sin(-30)
+data8 0x3FC3BE82F2505A52 //cos(-30)
+data8 0x3FE53C7D20A6C9E7 //sin(-29)
+data8 0xBFE7F01658314E47 //cos(-29)
+data8 0xBFD156853B4514D6 //sin(-28)
+data8 0xBFEECDAAD1582500 //cos(-28)
+data8 0xBFEE9AA1B0E5BA30 //sin(-27)
+data8 0xBFD2B266F959DED5 //cos(-27)
+data8 0xBFE866E0FAC32583 //sin(-26)
+data8 0x3FE4B3902691A9ED //cos(-26)
+data8 0x3FC0F0E6F31E809D //sin(-25)
+data8 0x3FEFB7EEF59504FF //cos(-25)
+data8 0x3FECFA7F7919140F //sin(-24)
+data8 0x3FDB25BFB50A609A //cos(-24)
+data8 0x3FEB143CD0247D02 //sin(-23)
+data8 0xBFE10CF7D591F272 //cos(-23)
+data8 0x3F8220A29F6EB9F4 //sin(-22)
+data8 0xBFEFFFADD8D4ACDA //cos(-22)
+data8 0xBFEAC5E20BB0D7ED //sin(-21)
+data8 0xBFE186FF83773759 //cos(-21)
+data8 0xBFED36D8F55D3CE0 //sin(-20)
+data8 0x3FDA1E043964A83F //cos(-20)
+data8 0xBFC32F2D28F584CF //sin(-19)
+data8 0x3FEFA377DE108258 //cos(-19)
+data8 0x3FE8081668131E26 //sin(-18)
+data8 0x3FE52150815D2470 //cos(-18)
+data8 0x3FEEC3C4AC42882B //sin(-17)
+data8 0xBFD19C46B07F58E7 //cos(-17)
+data8 0x3FD26D02085F20F8 //sin(-16)
+data8 0xBFEEA5257E962F74 //cos(-16)
+data8 0xBFE4CF2871CEC2E8 //sin(-15)
+data8 0xBFE84F5D069CA4F3 //cos(-15)
+data8 0xBFEFB30E327C5E45 //sin(-14)
+data8 0x3FC1809AEC2CA0ED //cos(-14)
+data8 0xBFDAE4044881C506 //sin(-13)
+data8 0x3FED09CDD5260CB7 //cos(-13)
+data8 0x3FE12B9AF7D765A5 //sin(-12)
+data8 0x3FEB00DA046B65E3 //cos(-12)
+data8 0x3FEFFFEB762E93EB //sin(-11)
+data8 0x3F7220AE41EE2FDF //cos(-11)
+data8 0x3FE1689EF5F34F52 //sin(-10)
+data8 0xBFEAD9AC890C6B1F //cos(-10)
+data8 0xBFDA6026360C2F91 //sin( -9)
+data8 0xBFED27FAA6A6196B //cos( -9)
+data8 0xBFEFA8D2A028CF7B //sin( -8)
+data8 0xBFC29FBEBF632F94 //cos( -8)
+data8 0xBFE50608C26D0A08 //sin( -7)
+data8 0x3FE81FF79ED92017 //cos( -7)
+data8 0x3FD1E1F18AB0A2C0 //sin( -6)
+data8 0x3FEEB9B7097822F5 //cos( -6)
+data8 0x3FEEAF81F5E09933 //sin( -5)
+data8 0x3FD22785706B4AD9 //cos( -5)
+data8 0x3FE837B9DDDC1EAE //sin( -4)
+data8 0xBFE4EAA606DB24C1 //cos( -4)
+data8 0xBFC210386DB6D55B //sin( -3)
+data8 0xBFEFAE04BE85E5D2 //cos( -3)
+data8 0xBFED18F6EAD1B446 //sin( -2)
+data8 0xBFDAA22657537205 //cos( -2)
+data8 0xBFEAED548F090CEE //sin( -1)
+data8 0x3FE14A280FB5068C //cos( -1)
+data8 0x0000000000000000 //sin( 0)
+data8 0x3FF0000000000000 //cos( 0)
+data8 0x3FEAED548F090CEE //sin( 1)
+data8 0x3FE14A280FB5068C //cos( 1)
+data8 0x3FED18F6EAD1B446 //sin( 2)
+data8 0xBFDAA22657537205 //cos( 2)
+data8 0x3FC210386DB6D55B //sin( 3)
+data8 0xBFEFAE04BE85E5D2 //cos( 3)
+data8 0xBFE837B9DDDC1EAE //sin( 4)
+data8 0xBFE4EAA606DB24C1 //cos( 4)
+data8 0xBFEEAF81F5E09933 //sin( 5)
+data8 0x3FD22785706B4AD9 //cos( 5)
+data8 0xBFD1E1F18AB0A2C0 //sin( 6)
+data8 0x3FEEB9B7097822F5 //cos( 6)
+data8 0x3FE50608C26D0A08 //sin( 7)
+data8 0x3FE81FF79ED92017 //cos( 7)
+data8 0x3FEFA8D2A028CF7B //sin( 8)
+data8 0xBFC29FBEBF632F94 //cos( 8)
+data8 0x3FDA6026360C2F91 //sin( 9)
+data8 0xBFED27FAA6A6196B //cos( 9)
+data8 0xBFE1689EF5F34F52 //sin( 10)
+data8 0xBFEAD9AC890C6B1F //cos( 10)
+data8 0xBFEFFFEB762E93EB //sin( 11)
+data8 0x3F7220AE41EE2FDF //cos( 11)
+data8 0xBFE12B9AF7D765A5 //sin( 12)
+data8 0x3FEB00DA046B65E3 //cos( 12)
+data8 0x3FDAE4044881C506 //sin( 13)
+data8 0x3FED09CDD5260CB7 //cos( 13)
+data8 0x3FEFB30E327C5E45 //sin( 14)
+data8 0x3FC1809AEC2CA0ED //cos( 14)
+data8 0x3FE4CF2871CEC2E8 //sin( 15)
+data8 0xBFE84F5D069CA4F3 //cos( 15)
+data8 0xBFD26D02085F20F8 //sin( 16)
+data8 0xBFEEA5257E962F74 //cos( 16)
+data8 0xBFEEC3C4AC42882B //sin( 17)
+data8 0xBFD19C46B07F58E7 //cos( 17)
+data8 0xBFE8081668131E26 //sin( 18)
+data8 0x3FE52150815D2470 //cos( 18)
+data8 0x3FC32F2D28F584CF //sin( 19)
+data8 0x3FEFA377DE108258 //cos( 19)
+data8 0x3FED36D8F55D3CE0 //sin( 20)
+data8 0x3FDA1E043964A83F //cos( 20)
+data8 0x3FEAC5E20BB0D7ED //sin( 21)
+data8 0xBFE186FF83773759 //cos( 21)
+data8 0xBF8220A29F6EB9F4 //sin( 22)
+data8 0xBFEFFFADD8D4ACDA //cos( 22)
+data8 0xBFEB143CD0247D02 //sin( 23)
+data8 0xBFE10CF7D591F272 //cos( 23)
+data8 0xBFECFA7F7919140F //sin( 24)
+data8 0x3FDB25BFB50A609A //cos( 24)
+data8 0xBFC0F0E6F31E809D //sin( 25)
+data8 0x3FEFB7EEF59504FF //cos( 25)
+data8 0x3FE866E0FAC32583 //sin( 26)
+data8 0x3FE4B3902691A9ED //cos( 26)
+data8 0x3FEE9AA1B0E5BA30 //sin( 27)
+data8 0xBFD2B266F959DED5 //cos( 27)
+data8 0x3FD156853B4514D6 //sin( 28)
+data8 0xBFEECDAAD1582500 //cos( 28)
+data8 0xBFE53C7D20A6C9E7 //sin( 29)
+data8 0xBFE7F01658314E47 //cos( 29)
+data8 0xBFEF9DF47F1C903D //sin( 30)
+data8 0x3FC3BE82F2505A52 //cos( 30)
+data8 0xBFD9DBC0B640FC81 //sin( 31)
+data8 0x3FED4591C3E12A20 //cos( 31)
+data8 0x3FE1A54991426566 //sin( 32)
+data8 0x3FEAB1F5305DE8E5 //cos( 32)
+ASM_SIZE_DIRECTIVE(sin_coeff_1_table)
+
+//////////////////////////////////////////
+
+
+.global sinf
+.global cosf
+#ifdef _LIBC
+.global __sinf
+.global __cosf
+#endif
+
+.text
+.proc cosf
+#ifdef _LIBC
+.proc __cosf
+#endif
+.align 32
+
+
+cosf:
+#ifdef _LIBC
+__cosf:
+#endif
+{ .mfi
+ alloc r32 = ar.pfs,1,7,0,0
+ fcvt.fx.s1 sin_Mx = f8
+ cmp.ne p6,p7 = r0,r0 // p7 set if cos
+}
+{ .mfi
+ addl SIN_AD_PQ_1 = @ltoff(sin_coeff_1_table),gp
+ fnorm.s0 SIN_NORM_f8 = f8 // Sets denormal or invalid
+ mov sin_GR_sincos_flag = 0x0
+}
+;;
+
+{ .mfi
+ ld8 SIN_AD_PQ_1 = [SIN_AD_PQ_1]
+ fclass.m.unc p9,p0 = f8, 0x07
+ cmp.ne p8,p0 = r0,r0
+}
+{ .mfb
+ nop.m 999
+ nop.f 999
+ br.sptk L(SINCOSF_COMMON)
+}
+;;
+
+.endp cosf
+ASM_SIZE_DIRECTIVE(cosf)
+
+
+.text
+.proc sinf
+#ifdef _LIBC
+.proc __sinf
+#endif
+.align 32
+
+sinf:
+#ifdef _LIBC
+__sinf:
+#endif
+{ .mfi
+ alloc r32 = ar.pfs,1,7,0,0
+ fcvt.fx.s1 sin_Mx = f8
+ cmp.eq p6,p7 = r0,r0 // p6 set if sin
+}
+{ .mfi
+ addl SIN_AD_PQ_1 = @ltoff(sin_coeff_1_table),gp
+ fnorm.s0 SIN_NORM_f8 = f8 // Sets denormal or invalid
+ mov sin_GR_sincos_flag = 0x1
+}
+;;
+
+{ .mfi
+ ld8 SIN_AD_PQ_1 = [SIN_AD_PQ_1]
+ fclass.m.unc p8,p0 = f8, 0x07
+ cmp.ne p9,p0 = r0,r0
+}
+{ .mfb
+ nop.m 999
+ nop.f 999
+ br.sptk L(SINCOSF_COMMON)
+}
+;;
+
+
+L(SINCOSF_COMMON):
+
+// Here with p6 if sin, p7 if cos, p8 if sin(0), p9 if cos(0)
+
+
+{ .mmf
+ ldfpd sin_coeff_Q3, sin_coeff_Q4 = [SIN_AD_PQ_1], 16
+ nop.m 999
+ fclass.m.unc p11,p0 = f8, 0x23 // Test for x=inf
+}
+;;
+
+{ .mfb
+ ldfpd sin_coeff_Q5, sin_coeff_Q6 = [SIN_AD_PQ_1], 16
+ fclass.m.unc p10,p0 = f8, 0xc3 // Test for x=nan
+(p8) br.ret.spnt b0 // Exit for sin(0)
+}
+{ .mfb
+ nop.m 999
+(p9) fma.s f8 = f1,f1,f0
+(p9) br.ret.spnt b0 // Exit for cos(0)
+}
+;;
+
+{ .mmf
+ ldfpd sin_coeff_P4, sin_coeff_P5 = [SIN_AD_PQ_1], 16
+ addl gr_tmp = -1,r0
+ fcvt.xf sin_Mfloat = sin_Mx
+}
+;;
+
+{ .mfi
+ getf.sig sin_GR_Mint = sin_Mx
+(p11) frcpa.s0 f8,p13 = f0,f0 // qnan indef if x=inf
+ nop.i 999
+}
+{ .mfb
+ ldfpd sin_coeff_P1, sin_coeff_P2 = [SIN_AD_PQ_1], 16
+ nop.f 999
+(p11) br.ret.spnt b0 // Exit for x=inf
+}
+;;
+
+{ .mfi
+ ldfpd sin_coeff_Q1, sin_coeff_Q2 = [SIN_AD_PQ_1], 16
+ nop.f 999
+ cmp.ge p8,p9 = -33,sin_GR_Mint
+}
+{ .mfb
+ add sin_GR_index = 32,sin_GR_Mint
+(p10) fma.s f8 = f8,f1,f0 // Force qnan if x=nan
+(p10) br.ret.spnt b0 // Exit for x=nan
+}
+;;
+
+{ .mmi
+ ldfd sin_coeff_P3 = [SIN_AD_PQ_1], 16
+(p9) cmp.le p8,p0 = 33, sin_GR_Mint
+ shl sin_GR_index = sin_GR_index,4
+}
+;;
+
+
+{ .mfi
+ setf.sig fp_tmp = gr_tmp // Create constant such that fmpy sets inexact
+ fnma.s1 sin_r = f1,sin_Mfloat,SIN_NORM_f8
+(p8) cmp.eq.unc p11,p12=sin_GR_sincos_flag,r0 // p11 if must call dbl cos
+ // p12 if must call dbl sin
+}
+{ .mbb
+ add SIN_AD_PQ_2 = sin_GR_index,SIN_AD_PQ_1
+(p11) br.cond.spnt COS_DOUBLE
+(p12) br.cond.spnt SIN_DOUBLE
+}
+;;
+
+.pred.rel "mutex",p6,p7 //SIN_Sin_Flag, SIN_Cos_Flag
+{ .mmi
+(p6) ldfpd sin_tbl_S,sin_tbl_C = [SIN_AD_PQ_2]
+(p7) ldfpd sin_tbl_C,sin_tbl_S = [SIN_AD_PQ_2]
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p6) fclass.m.unc p8,p0 = f8, 0x0b // If sin, note denormal input to set uflow
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 sin_t = sin_r,sin_r,f0
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 sin_rcube = sin_t,sin_r,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 sin_tsq = sin_t,sin_t,f0
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 sin_poly_q3 = sin_t,sin_coeff_Q4,sin_coeff_Q3
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 sin_poly_q5 = sin_t,sin_coeff_Q6,sin_coeff_Q5
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 sin_poly_p1 = sin_t,sin_coeff_P5,sin_coeff_P4
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 sin_poly_p2 = sin_t,sin_coeff_P2,sin_coeff_P1
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 sin_poly_q1 = sin_t,sin_coeff_Q2,sin_coeff_Q1
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 sin_S_t = sin_t,sin_tbl_S,f0
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p8) fmpy.s.s0 fp_tmp2 = f8,f8 // Dummy mult to set underflow if sin(denormal)
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 sin_r7 = sin_rcube,sin_tsq,f0
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 sin_poly_q3456 = sin_tsq,sin_poly_q5,sin_poly_q3
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 sin_poly_p3 = sin_t,sin_poly_p1,sin_coeff_P3
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 sin_poly_p4 = sin_rcube,sin_poly_p2,sin_r
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 sin_tbl_S_tcube = sin_S_t,sin_tsq,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 sin_poly_q12 = sin_S_t,sin_poly_q1,sin_tbl_S
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.d.s1 sin_of_r = sin_r7,sin_poly_p3,sin_poly_p4
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.d.s1 sin_tbl_S_cos_of_r = sin_tbl_S_tcube,sin_poly_q3456,sin_poly_q12
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fmpy.s0 fp_tmp = fp_tmp, fp_tmp // Dummy mult to set inexact
+ nop.i 999
+}
+;;
+
+
+.pred.rel "mutex",p6,p7 //SIN_Sin_Flag, SIN_Cos_Flag
+{ .mfi
+ nop.m 999
+//(SIN_Sin_Flag) fma.s f8 = sin_tbl_C,sin_of_r,sin_tbl_S_cos_of_r
+(p6) fma.s f8 = sin_tbl_C,sin_of_r,sin_tbl_S_cos_of_r
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+//(SIN_Cos_Flag) fnma.s f8 = sin_tbl_C,sin_of_r,sin_tbl_S_cos_of_r
+(p7) fnma.s f8 = sin_tbl_C,sin_of_r,sin_tbl_S_cos_of_r
+ br.ret.sptk b0
+}
+
+.endp sinf
+ASM_SIZE_DIRECTIVE(sinf)
+
+
+.proc SIN_DOUBLE
+SIN_DOUBLE:
+.prologue
+{ .mfi
+ nop.m 0
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs
+}
+;;
+
+{ .mfi
+ mov GR_SAVE_GP=gp
+ nop.f 0
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0
+}
+
+.body
+{ .mmb
+ nop.m 999
+ nop.m 999
+ br.call.sptk.many b0=sin
+}
+;;
+
+{ .mfi
+ mov gp = GR_SAVE_GP
+ nop.f 999
+ mov b0 = GR_SAVE_B0
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s f8 = f8,f1,f0
+(p0) mov ar.pfs = GR_SAVE_PFS
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+(p0) br.ret.sptk b0
+}
+;;
+
+.endp SIN_DOUBLE
+ASM_SIZE_DIRECTIVE(SIN_DOUBLE)
+
+
+.proc COS_DOUBLE
+COS_DOUBLE:
+.prologue
+{ .mfi
+ nop.m 0
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs
+}
+;;
+
+{ .mfi
+ mov GR_SAVE_GP=gp
+ nop.f 0
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0
+}
+
+.body
+{ .mmb
+ nop.m 999
+ nop.m 999
+ br.call.sptk.many b0=cos
+}
+;;
+
+{ .mfi
+ mov gp = GR_SAVE_GP
+ nop.f 999
+ mov b0 = GR_SAVE_B0
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s f8 = f8,f1,f0
+(p0) mov ar.pfs = GR_SAVE_PFS
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+(p0) br.ret.sptk b0
+}
+;;
+
+.endp COS_DOUBLE
+ASM_SIZE_DIRECTIVE(COS_DOUBLE)
+
+
+
+.type sin,@function
+.global sin
+.type cos,@function
+.global cos
diff --git a/sysdeps/ia64/fpu/s_cosl.S b/sysdeps/ia64/fpu/s_cosl.S
new file mode 100644
index 0000000..a14ef5b
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_cosl.S
@@ -0,0 +1,2506 @@
+.file "sincosl.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// *********************************************************************
+//
+// History:
+// 2/02/2000 (hand-optimized)
+// 4/04/00 Unwind support added
+//
+// *********************************************************************
+//
+// Function: Combined sinl(x) and cosl(x), where
+//
+// sinl(x) = sine(x), for double-extended precision x values
+// cosl(x) = cosine(x), for double-extended precision x values
+//
+// *********************************************************************
+//
+// Resources Used:
+//
+// Floating-Point Registers: f8 (Input and Return Value)
+// f32-f99
+//
+// General Purpose Registers:
+// r32-r43
+// r44-r45 (Used to pass arguments to pi_by_2 reduce routine)
+//
+// Predicate Registers: p6-p13
+//
+// *********************************************************************
+//
+// IEEE Special Conditions:
+//
+// Denormal fault raised on denormal inputs
+// Overflow exceptions do not occur
+// Underflow exceptions raised when appropriate for sin
+// (No specialized error handling for this routine)
+// Inexact raised when appropriate by algorithm
+//
+// sinl(SNaN) = QNaN
+// sinl(QNaN) = QNaN
+// sinl(inf) = QNaN
+// sinl(+/-0) = +/-0
+// cosl(inf) = QNaN
+// cosl(SNaN) = QNaN
+// cosl(QNaN) = QNaN
+// cosl(0) = 1
+//
+// *********************************************************************
+//
+// Mathematical Description
+// ========================
+//
+// The computation of FSIN and FCOS is best handled in one piece of
+// code. The main reason is that given any argument Arg, computation
+// of trigonometric functions first calculate N and an approximation
+// to alpha where
+//
+// Arg = N pi/2 + alpha, |alpha| <= pi/4.
+//
+// Since
+//
+// cosl( Arg ) = sinl( (N+1) pi/2 + alpha ),
+//
+// therefore, the code for computing sine will produce cosine as long
+// as 1 is added to N immediately after the argument reduction
+// process.
+//
+// Let M = N if sine
+// N+1 if cosine.
+//
+// Now, given
+//
+// Arg = M pi/2 + alpha, |alpha| <= pi/4,
+//
+// let I = M mod 4, or I be the two lsb of M when M is represented
+// as 2's complement. I = [i_0 i_1]. Then
+//
+// sinl( Arg ) = (-1)^i_0 sinl( alpha ) if i_1 = 0,
+// = (-1)^i_0 cosl( alpha ) if i_1 = 1.
+//
+// For example:
+// if M = -1, I = 11
+// sin ((-pi/2 + alpha) = (-1) cos (alpha)
+// if M = 0, I = 00
+// sin (alpha) = sin (alpha)
+// if M = 1, I = 01
+// sin (pi/2 + alpha) = cos (alpha)
+// if M = 2, I = 10
+// sin (pi + alpha) = (-1) sin (alpha)
+// if M = 3, I = 11
+// sin ((3/2)pi + alpha) = (-1) cos (alpha)
+//
+// The value of alpha is obtained by argument reduction and
+// represented by two working precision numbers r and c where
+//
+// alpha = r + c accurately.
+//
+// The reduction method is described in a previous write up.
+// The argument reduction scheme identifies 4 cases. For Cases 2
+// and 4, because |alpha| is small, sinl(r+c) and cosl(r+c) can be
+// computed very easily by 2 or 3 terms of the Taylor series
+// expansion as follows:
+//
+// Case 2:
+// -------
+//
+// sinl(r + c) = r + c - r^3/6 accurately
+// cosl(r + c) = 1 - 2^(-67) accurately
+//
+// Case 4:
+// -------
+//
+// sinl(r + c) = r + c - r^3/6 + r^5/120 accurately
+// cosl(r + c) = 1 - r^2/2 + r^4/24 accurately
+//
+// The only cases left are Cases 1 and 3 of the argument reduction
+// procedure. These two cases will be merged since after the
+// argument is reduced in either cases, we have the reduced argument
+// represented as r + c and that the magnitude |r + c| is not small
+// enough to allow the usage of a very short approximation.
+//
+// The required calculation is either
+//
+// sinl(r + c) = sinl(r) + correction, or
+// cosl(r + c) = cosl(r) + correction.
+//
+// Specifically,
+//
+// sinl(r + c) = sinl(r) + c sin'(r) + O(c^2)
+// = sinl(r) + c cos (r) + O(c^2)
+// = sinl(r) + c(1 - r^2/2) accurately.
+// Similarly,
+//
+// cosl(r + c) = cosl(r) - c sinl(r) + O(c^2)
+// = cosl(r) - c(r - r^3/6) accurately.
+//
+// We therefore concentrate on accurately calculating sinl(r) and
+// cosl(r) for a working-precision number r, |r| <= pi/4 to within
+// 0.1% or so.
+//
+// The greatest challenge of this task is that the second terms of
+// the Taylor series
+//
+// r - r^3/3! + r^r/5! - ...
+//
+// and
+//
+// 1 - r^2/2! + r^4/4! - ...
+//
+// are not very small when |r| is close to pi/4 and the rounding
+// errors will be a concern if simple polynomial accumulation is
+// used. When |r| < 2^-3, however, the second terms will be small
+// enough (6 bits or so of right shift) that a normal Horner
+// recurrence suffices. Hence there are two cases that we consider
+// in the accurate computation of sinl(r) and cosl(r), |r| <= pi/4.
+//
+// Case small_r: |r| < 2^(-3)
+// --------------------------
+//
+// Since Arg = M pi/4 + r + c accurately, and M mod 4 is [i_0 i_1],
+// we have
+//
+// sinl(Arg) = (-1)^i_0 * sinl(r + c) if i_1 = 0
+// = (-1)^i_0 * cosl(r + c) if i_1 = 1
+//
+// can be accurately approximated by
+//
+// sinl(Arg) = (-1)^i_0 * [sinl(r) + c] if i_1 = 0
+// = (-1)^i_0 * [cosl(r) - c*r] if i_1 = 1
+//
+// because |r| is small and thus the second terms in the correction
+// are unneccessary.
+//
+// Finally, sinl(r) and cosl(r) are approximated by polynomials of
+// moderate lengths.
+//
+// sinl(r) = r + S_1 r^3 + S_2 r^5 + ... + S_5 r^11
+// cosl(r) = 1 + C_1 r^2 + C_2 r^4 + ... + C_5 r^10
+//
+// We can make use of predicates to selectively calculate
+// sinl(r) or cosl(r) based on i_1.
+//
+// Case normal_r: 2^(-3) <= |r| <= pi/4
+// ------------------------------------
+//
+// This case is more likely than the previous one if one considers
+// r to be uniformly distributed in [-pi/4 pi/4]. Again,
+//
+// sinl(Arg) = (-1)^i_0 * sinl(r + c) if i_1 = 0
+// = (-1)^i_0 * cosl(r + c) if i_1 = 1.
+//
+// Because |r| is now larger, we need one extra term in the
+// correction. sinl(Arg) can be accurately approximated by
+//
+// sinl(Arg) = (-1)^i_0 * [sinl(r) + c(1-r^2/2)] if i_1 = 0
+// = (-1)^i_0 * [cosl(r) - c*r*(1 - r^2/6)] i_1 = 1.
+//
+// Finally, sinl(r) and cosl(r) are approximated by polynomials of
+// moderate lengths.
+//
+// sinl(r) = r + PP_1_hi r^3 + PP_1_lo r^3 +
+// PP_2 r^5 + ... + PP_8 r^17
+//
+// cosl(r) = 1 + QQ_1 r^2 + QQ_2 r^4 + ... + QQ_8 r^16
+//
+// where PP_1_hi is only about 16 bits long and QQ_1 is -1/2.
+// The crux in accurate computation is to calculate
+//
+// r + PP_1_hi r^3 or 1 + QQ_1 r^2
+//
+// accurately as two pieces: U_hi and U_lo. The way to achieve this
+// is to obtain r_hi as a 10 sig. bit number that approximates r to
+// roughly 8 bits or so of accuracy. (One convenient way is
+//
+// r_hi := frcpa( frcpa( r ) ).)
+//
+// This way,
+//
+// r + PP_1_hi r^3 = r + PP_1_hi r_hi^3 +
+// PP_1_hi (r^3 - r_hi^3)
+// = [r + PP_1_hi r_hi^3] +
+// [PP_1_hi (r - r_hi)
+// (r^2 + r_hi r + r_hi^2) ]
+// = U_hi + U_lo
+//
+// Since r_hi is only 10 bit long and PP_1_hi is only 16 bit long,
+// PP_1_hi * r_hi^3 is only at most 46 bit long and thus computed
+// exactly. Furthermore, r and PP_1_hi r_hi^3 are of opposite sign
+// and that there is no more than 8 bit shift off between r and
+// PP_1_hi * r_hi^3. Hence the sum, U_hi, is representable and thus
+// calculated without any error. Finally, the fact that
+//
+// |U_lo| <= 2^(-8) |U_hi|
+//
+// says that U_hi + U_lo is approximating r + PP_1_hi r^3 to roughly
+// 8 extra bits of accuracy.
+//
+// Similarly,
+//
+// 1 + QQ_1 r^2 = [1 + QQ_1 r_hi^2] +
+// [QQ_1 (r - r_hi)(r + r_hi)]
+// = U_hi + U_lo.
+//
+// Summarizing, we calculate r_hi = frcpa( frcpa( r ) ).
+//
+// If i_1 = 0, then
+//
+// U_hi := r + PP_1_hi * r_hi^3
+// U_lo := PP_1_hi * (r - r_hi) * (r^2 + r*r_hi + r_hi^2)
+// poly := PP_1_lo r^3 + PP_2 r^5 + ... + PP_8 r^17
+// correction := c * ( 1 + C_1 r^2 )
+//
+// Else ...i_1 = 1
+//
+// U_hi := 1 + QQ_1 * r_hi * r_hi
+// U_lo := QQ_1 * (r - r_hi) * (r + r_hi)
+// poly := QQ_2 * r^4 + QQ_3 * r^6 + ... + QQ_8 r^16
+// correction := -c * r * (1 + S_1 * r^2)
+//
+// End
+//
+// Finally,
+//
+// V := poly + ( U_lo + correction )
+//
+// / U_hi + V if i_0 = 0
+// result := |
+// \ (-U_hi) - V if i_0 = 1
+//
+// It is important that in the last step, negation of U_hi is
+// performed prior to the subtraction which is to be performed in
+// the user-set rounding mode.
+//
+//
+// Algorithmic Description
+// =======================
+//
+// The argument reduction algorithm is tightly integrated into FSIN
+// and FCOS which share the same code. The following is complete and
+// self-contained. The argument reduction description given
+// previously is repeated below.
+//
+//
+// Step 0. Initialization.
+//
+// If FSIN is invoked, set N_inc := 0; else if FCOS is invoked,
+// set N_inc := 1.
+//
+// Step 1. Check for exceptional and special cases.
+//
+// * If Arg is +-0, +-inf, NaN, NaT, go to Step 10 for special
+// handling.
+// * If |Arg| < 2^24, go to Step 2 for reduction of moderate
+// arguments. This is the most likely case.
+// * If |Arg| < 2^63, go to Step 8 for pre-reduction of large
+// arguments.
+// * If |Arg| >= 2^63, go to Step 10 for special handling.
+//
+// Step 2. Reduction of moderate arguments.
+//
+// If |Arg| < pi/4 ...quick branch
+// N_fix := N_inc (integer)
+// r := Arg
+// c := 0.0
+// Branch to Step 4, Case_1_complete
+// Else ...cf. argument reduction
+// N := Arg * two_by_PI (fp)
+// N_fix := fcvt.fx( N ) (int)
+// N := fcvt.xf( N_fix )
+// N_fix := N_fix + N_inc
+// s := Arg - N * P_1 (first piece of pi/2)
+// w := -N * P_2 (second piece of pi/2)
+//
+// If |s| >= 2^(-33)
+// go to Step 3, Case_1_reduce
+// Else
+// go to Step 7, Case_2_reduce
+// Endif
+// Endif
+//
+// Step 3. Case_1_reduce.
+//
+// r := s + w
+// c := (s - r) + w ...observe order
+//
+// Step 4. Case_1_complete
+//
+// ...At this point, the reduced argument alpha is
+// ...accurately represented as r + c.
+// If |r| < 2^(-3), go to Step 6, small_r.
+//
+// Step 5. Normal_r.
+//
+// Let [i_0 i_1] by the 2 lsb of N_fix.
+// FR_rsq := r * r
+// r_hi := frcpa( frcpa( r ) )
+// r_lo := r - r_hi
+//
+// If i_1 = 0, then
+// poly := r*FR_rsq*(PP_1_lo + FR_rsq*(PP_2 + ... FR_rsq*PP_8))
+// U_hi := r + PP_1_hi*r_hi*r_hi*r_hi ...any order
+// U_lo := PP_1_hi*r_lo*(r*r + r*r_hi + r_hi*r_hi)
+// correction := c + c*C_1*FR_rsq ...any order
+// Else
+// poly := FR_rsq*FR_rsq*(QQ_2 + FR_rsq*(QQ_3 + ... + FR_rsq*QQ_8))
+// U_hi := 1 + QQ_1 * r_hi * r_hi ...any order
+// U_lo := QQ_1 * r_lo * (r + r_hi)
+// correction := -c*(r + S_1*FR_rsq*r) ...any order
+// Endif
+//
+// V := poly + (U_lo + correction) ...observe order
+//
+// result := (i_0 == 0? 1.0 : -1.0)
+//
+// Last instruction in user-set rounding mode
+//
+// result := (i_0 == 0? result*U_hi + V :
+// result*U_hi - V)
+//
+// Return
+//
+// Step 6. Small_r.
+//
+// ...Use flush to zero mode without causing exception
+// Let [i_0 i_1] be the two lsb of N_fix.
+//
+// FR_rsq := r * r
+//
+// If i_1 = 0 then
+// z := FR_rsq*FR_rsq; z := FR_rsq*z *r
+// poly_lo := S_3 + FR_rsq*(S_4 + FR_rsq*S_5)
+// poly_hi := r*FR_rsq*(S_1 + FR_rsq*S_2)
+// correction := c
+// result := r
+// Else
+// z := FR_rsq*FR_rsq; z := FR_rsq*z
+// poly_lo := C_3 + FR_rsq*(C_4 + FR_rsq*C_5)
+// poly_hi := FR_rsq*(C_1 + FR_rsq*C_2)
+// correction := -c*r
+// result := 1
+// Endif
+//
+// poly := poly_hi + (z * poly_lo + correction)
+//
+// If i_0 = 1, result := -result
+//
+// Last operation. Perform in user-set rounding mode
+//
+// result := (i_0 == 0? result + poly :
+// result - poly )
+// Return
+//
+// Step 7. Case_2_reduce.
+//
+// ...Refer to the write up for argument reduction for
+// ...rationale. The reduction algorithm below is taken from
+// ...argument reduction description and integrated this.
+//
+// w := N*P_3
+// U_1 := N*P_2 + w ...FMA
+// U_2 := (N*P_2 - U_1) + w ...2 FMA
+// ...U_1 + U_2 is N*(P_2+P_3) accurately
+//
+// r := s - U_1
+// c := ( (s - r) - U_1 ) - U_2
+//
+// ...The mathematical sum r + c approximates the reduced
+// ...argument accurately. Note that although compared to
+// ...Case 1, this case requires much more work to reduce
+// ...the argument, the subsequent calculation needed for
+// ...any of the trigonometric function is very little because
+// ...|alpha| < 1.01*2^(-33) and thus two terms of the
+// ...Taylor series expansion suffices.
+//
+// If i_1 = 0 then
+// poly := c + S_1 * r * r * r ...any order
+// result := r
+// Else
+// poly := -2^(-67)
+// result := 1.0
+// Endif
+//
+// If i_0 = 1, result := -result
+//
+// Last operation. Perform in user-set rounding mode
+//
+// result := (i_0 == 0? result + poly :
+// result - poly )
+//
+// Return
+//
+//
+// Step 8. Pre-reduction of large arguments.
+//
+// ...Again, the following reduction procedure was described
+// ...in the separate write up for argument reduction, which
+// ...is tightly integrated here.
+
+// N_0 := Arg * Inv_P_0
+// N_0_fix := fcvt.fx( N_0 )
+// N_0 := fcvt.xf( N_0_fix)
+
+// Arg' := Arg - N_0 * P_0
+// w := N_0 * d_1
+// N := Arg' * two_by_PI
+// N_fix := fcvt.fx( N )
+// N := fcvt.xf( N_fix )
+// N_fix := N_fix + N_inc
+//
+// s := Arg' - N * P_1
+// w := w - N * P_2
+//
+// If |s| >= 2^(-14)
+// go to Step 3
+// Else
+// go to Step 9
+// Endif
+//
+// Step 9. Case_4_reduce.
+//
+// ...first obtain N_0*d_1 and -N*P_2 accurately
+// U_hi := N_0 * d_1 V_hi := -N*P_2
+// U_lo := N_0 * d_1 - U_hi V_lo := -N*P_2 - U_hi ...FMAs
+//
+// ...compute the contribution from N_0*d_1 and -N*P_3
+// w := -N*P_3
+// w := w + N_0*d_2
+// t := U_lo + V_lo + w ...any order
+//
+// ...at this point, the mathematical value
+// ...s + U_hi + V_hi + t approximates the true reduced argument
+// ...accurately. Just need to compute this accurately.
+//
+// ...Calculate U_hi + V_hi accurately:
+// A := U_hi + V_hi
+// if |U_hi| >= |V_hi| then
+// a := (U_hi - A) + V_hi
+// else
+// a := (V_hi - A) + U_hi
+// endif
+// ...order in computing "a" must be observed. This branch is
+// ...best implemented by predicates.
+// ...A + a is U_hi + V_hi accurately. Moreover, "a" is
+// ...much smaller than A: |a| <= (1/2)ulp(A).
+//
+// ...Just need to calculate s + A + a + t
+// C_hi := s + A t := t + a
+// C_lo := (s - C_hi) + A
+// C_lo := C_lo + t
+//
+// ...Final steps for reduction
+// r := C_hi + C_lo
+// c := (C_hi - r) + C_lo
+//
+// ...At this point, we have r and c
+// ...And all we need is a couple of terms of the corresponding
+// ...Taylor series.
+//
+// If i_1 = 0
+// poly := c + r*FR_rsq*(S_1 + FR_rsq*S_2)
+// result := r
+// Else
+// poly := FR_rsq*(C_1 + FR_rsq*C_2)
+// result := 1
+// Endif
+//
+// If i_0 = 1, result := -result
+//
+// Last operation. Perform in user-set rounding mode
+//
+// result := (i_0 == 0? result + poly :
+// result - poly )
+// Return
+//
+// Large Arguments: For arguments above 2**63, a Payne-Hanek
+// style argument reduction is used and pi_by_2 reduce is called.
+//
+
+#include "libm_support.h"
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+.align 64
+
+FSINCOSL_CONSTANTS:
+ASM_TYPE_DIRECTIVE(FSINCOSL_CONSTANTS,@object)
+data4 0x4B800000, 0xCB800000, 0x00000000,0x00000000 // two**24, -two**24
+data4 0x4E44152A, 0xA2F9836E, 0x00003FFE,0x00000000 // Inv_pi_by_2
+data4 0xCE81B9F1, 0xC84D32B0, 0x00004016,0x00000000 // P_0
+data4 0x2168C235, 0xC90FDAA2, 0x00003FFF,0x00000000 // P_1
+data4 0xFC8F8CBB, 0xECE675D1, 0x0000BFBD,0x00000000 // P_2
+data4 0xACC19C60, 0xB7ED8FBB, 0x0000BF7C,0x00000000 // P_3
+data4 0x5F000000, 0xDF000000, 0x00000000,0x00000000 // two_to_63, -two_to_63
+data4 0x6EC6B45A, 0xA397E504, 0x00003FE7,0x00000000 // Inv_P_0
+data4 0xDBD171A1, 0x8D848E89, 0x0000BFBF,0x00000000 // d_1
+data4 0x18A66F8E, 0xD5394C36, 0x0000BF7C,0x00000000 // d_2
+data4 0x2168C234, 0xC90FDAA2, 0x00003FFE,0x00000000 // pi_by_4
+data4 0x2168C234, 0xC90FDAA2, 0x0000BFFE,0x00000000 // neg_pi_by_4
+data4 0x3E000000, 0xBE000000, 0x00000000,0x00000000 // two**-3, -two**-3
+data4 0x2F000000, 0xAF000000, 0x9E000000,0x00000000 // two**-33, -two**-33, -two**-67
+data4 0xA21C0BC9, 0xCC8ABEBC, 0x00003FCE,0x00000000 // PP_8
+data4 0x720221DA, 0xD7468A05, 0x0000BFD6,0x00000000 // PP_7
+data4 0x640AD517, 0xB092382F, 0x00003FDE,0x00000000 // PP_6
+data4 0xD1EB75A4, 0xD7322B47, 0x0000BFE5,0x00000000 // PP_5
+data4 0xFFFFFFFE, 0xFFFFFFFF, 0x0000BFFD,0x00000000 // C_1
+data4 0x00000000, 0xAAAA0000, 0x0000BFFC,0x00000000 // PP_1_hi
+data4 0xBAF69EEA, 0xB8EF1D2A, 0x00003FEC,0x00000000 // PP_4
+data4 0x0D03BB69, 0xD00D00D0, 0x0000BFF2,0x00000000 // PP_3
+data4 0x88888962, 0x88888888, 0x00003FF8,0x00000000 // PP_2
+data4 0xAAAB0000, 0xAAAAAAAA, 0x0000BFEC,0x00000000 // PP_1_lo
+data4 0xC2B0FE52, 0xD56232EF, 0x00003FD2,0x00000000 // QQ_8
+data4 0x2B48DCA6, 0xC9C99ABA, 0x0000BFDA,0x00000000 // QQ_7
+data4 0x9C716658, 0x8F76C650, 0x00003FE2,0x00000000 // QQ_6
+data4 0xFDA8D0FC, 0x93F27DBA, 0x0000BFE9,0x00000000 // QQ_5
+data4 0xAAAAAAAA, 0xAAAAAAAA, 0x0000BFFC,0x00000000 // S_1
+data4 0x00000000, 0x80000000, 0x0000BFFE,0x00000000 // QQ_1
+data4 0x0C6E5041, 0xD00D00D0, 0x00003FEF,0x00000000 // QQ_4
+data4 0x0B607F60, 0xB60B60B6, 0x0000BFF5,0x00000000 // QQ_3
+data4 0xAAAAAA9B, 0xAAAAAAAA, 0x00003FFA,0x00000000 // QQ_2
+data4 0xFFFFFFFE, 0xFFFFFFFF, 0x0000BFFD,0x00000000 // C_1
+data4 0xAAAA719F, 0xAAAAAAAA, 0x00003FFA,0x00000000 // C_2
+data4 0x0356F994, 0xB60B60B6, 0x0000BFF5,0x00000000 // C_3
+data4 0xB2385EA9, 0xD00CFFD5, 0x00003FEF,0x00000000 // C_4
+data4 0x292A14CD, 0x93E4BD18, 0x0000BFE9,0x00000000 // C_5
+data4 0xAAAAAAAA, 0xAAAAAAAA, 0x0000BFFC,0x00000000 // S_1
+data4 0x888868DB, 0x88888888, 0x00003FF8,0x00000000 // S_2
+data4 0x055EFD4B, 0xD00D00D0, 0x0000BFF2,0x00000000 // S_3
+data4 0x839730B9, 0xB8EF1C5D, 0x00003FEC,0x00000000 // S_4
+data4 0xE5B3F492, 0xD71EA3A4, 0x0000BFE5,0x00000000 // S_5
+data4 0x38800000, 0xB8800000, 0x00000000 // two**-14, -two**-14
+ASM_SIZE_DIRECTIVE(FSINCOSL_CONSTANTS)
+
+FR_Input_X = f8
+FR_Neg_Two_to_M3 = f32
+FR_Two_to_63 = f32
+FR_Two_to_24 = f33
+FR_Pi_by_4 = f33
+FR_Two_to_M14 = f34
+FR_Two_to_M33 = f35
+FR_Neg_Two_to_24 = f36
+FR_Neg_Pi_by_4 = f36
+FR_Neg_Two_to_M14 = f37
+FR_Neg_Two_to_M33 = f38
+FR_Neg_Two_to_M67 = f39
+FR_Inv_pi_by_2 = f40
+FR_N_float = f41
+FR_N_fix = f42
+FR_P_1 = f43
+FR_P_2 = f44
+FR_P_3 = f45
+FR_s = f46
+FR_w = f47
+FR_c = f48
+FR_r = f49
+FR_Z = f50
+FR_A = f51
+FR_a = f52
+FR_t = f53
+FR_U_1 = f54
+FR_U_2 = f55
+FR_C_1 = f56
+FR_C_2 = f57
+FR_C_3 = f58
+FR_C_4 = f59
+FR_C_5 = f60
+FR_S_1 = f61
+FR_S_2 = f62
+FR_S_3 = f63
+FR_S_4 = f64
+FR_S_5 = f65
+FR_poly_hi = f66
+FR_poly_lo = f67
+FR_r_hi = f68
+FR_r_lo = f69
+FR_rsq = f70
+FR_r_cubed = f71
+FR_C_hi = f72
+FR_N_0 = f73
+FR_d_1 = f74
+FR_V = f75
+FR_V_hi = f75
+FR_V_lo = f76
+FR_U_hi = f77
+FR_U_lo = f78
+FR_U_hiabs = f79
+FR_V_hiabs = f80
+FR_PP_8 = f81
+FR_QQ_8 = f81
+FR_PP_7 = f82
+FR_QQ_7 = f82
+FR_PP_6 = f83
+FR_QQ_6 = f83
+FR_PP_5 = f84
+FR_QQ_5 = f84
+FR_PP_4 = f85
+FR_QQ_4 = f85
+FR_PP_3 = f86
+FR_QQ_3 = f86
+FR_PP_2 = f87
+FR_QQ_2 = f87
+FR_QQ_1 = f88
+FR_N_0_fix = f89
+FR_Inv_P_0 = f90
+FR_corr = f91
+FR_poly = f92
+FR_d_2 = f93
+FR_Two_to_M3 = f94
+FR_Neg_Two_to_63 = f94
+FR_P_0 = f95
+FR_C_lo = f96
+FR_PP_1 = f97
+FR_PP_1_lo = f98
+FR_ArgPrime = f99
+
+GR_Table_Base = r32
+GR_Table_Base1 = r33
+GR_i_0 = r34
+GR_i_1 = r35
+GR_N_Inc = r36
+GR_Sin_or_Cos = r37
+
+// Added for unwind support
+
+GR_SAVE_B0 = r39
+GR_SAVE_GP = r40
+GR_SAVE_PFS = r41
+
+
+.global sinl#
+.global cosl#
+#ifdef _LIBC
+.global __sinl#
+.global __cosl#
+#endif
+
+.section .text
+.proc sinl#
+#ifdef _LIBC
+.proc __sinl#
+#endif
+.align 64
+sinl:
+#ifdef _LIBC
+__sinl:
+#endif
+{ .mlx
+alloc GR_Table_Base = ar.pfs,0,12,2,0
+(p0) movl GR_Sin_or_Cos = 0x0 ;;
+}
+
+{ .mmi
+ nop.m 999
+(p0) addl GR_Table_Base = @ltoff(FSINCOSL_CONSTANTS#), gp
+ nop.i 999
+}
+;;
+
+{ .mmb
+ ld8 GR_Table_Base = [GR_Table_Base]
+ nop.m 999
+(p0) br.cond.sptk L(SINCOSL_CONTINUE) ;;
+}
+;;
+
+
+.endp sinl#
+ASM_SIZE_DIRECTIVE(sinl#)
+
+.section .text
+.proc cosl#
+cosl:
+#ifdef _LIBC
+.proc __cosl#
+__cosl:
+#endif
+{ .mlx
+alloc GR_Table_Base= ar.pfs,0,12,2,0
+(p0) movl GR_Sin_or_Cos = 0x1 ;;
+}
+;;
+
+{ .mmi
+ nop.m 999
+(p0) addl GR_Table_Base = @ltoff(FSINCOSL_CONSTANTS#), gp
+ nop.i 999
+}
+;;
+
+{ .mmb
+ ld8 GR_Table_Base = [GR_Table_Base]
+ nop.m 999
+ nop.b 999
+}
+;;
+
+
+
+//
+// Load Table Address
+//
+
+L(SINCOSL_CONTINUE):
+{ .mmi
+(p0) add GR_Table_Base1 = 96, GR_Table_Base
+(p0) ldfs FR_Two_to_24 = [GR_Table_Base], 4
+// GR_Sin_or_Cos denotes
+(p0) mov r39 = b0 ;;
+}
+{ .mmi
+ nop.m 0
+//
+// Load 2**24, load 2**63.
+//
+(p0) ldfs FR_Neg_Two_to_24 = [GR_Table_Base], 12
+ nop.i 0
+}
+{ .mfi
+(p0) ldfs FR_Two_to_63 = [GR_Table_Base1], 4
+//
+// Check for unnormals - unsupported operands. We do not want
+// to generate denormal exception
+// Check for NatVals, QNaNs, SNaNs, +/-Infs
+// Check for EM unsupporteds
+// Check for Zero
+//
+(p0) fclass.m.unc p6, p0 = FR_Input_X, 0x1E3
+ nop.i 0
+};;
+{ .mmf
+ nop.m 999
+(p0) ldfs FR_Neg_Two_to_63 = [GR_Table_Base1], 12
+(p0) fclass.nm.unc p8, p0 = FR_Input_X, 0x1FF
+}
+{ .mfb
+ nop.m 999
+(p0) fclass.m.unc p10, p0 = FR_Input_X, 0x007
+(p6) br.cond.spnt L(SINCOSL_SPECIAL) ;;
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+(p8) br.cond.spnt L(SINCOSL_SPECIAL) ;;
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+//
+// Branch if +/- NaN, Inf.
+// Load -2**24, load -2**63.
+//
+(p10) br.cond.spnt L(SINCOSL_ZERO) ;;
+}
+{ .mmb
+(p0) ldfe FR_Inv_pi_by_2 = [GR_Table_Base], 16
+(p0) ldfe FR_Inv_P_0 = [GR_Table_Base1], 16
+ nop.b 999 ;;
+}
+{ .mmb
+(p0) ldfe FR_d_1 = [GR_Table_Base1], 16
+//
+// Raise possible denormal operand flag with useful fcmp
+// Is x <= -2**63
+// Load Inv_P_0 for pre-reduction
+// Load Inv_pi_by_2
+//
+(p0) ldfe FR_P_0 = [GR_Table_Base], 16
+ nop.b 999 ;;
+}
+{ .mmb
+(p0) ldfe FR_d_2 = [GR_Table_Base1], 16
+//
+// Load P_0
+// Load d_1
+// Is x >= 2**63
+// Is x <= -2**24?
+//
+(p0) ldfe FR_P_1 = [GR_Table_Base], 16
+ nop.b 999 ;;
+}
+//
+// Load P_1
+// Load d_2
+// Is x >= 2**24?
+//
+{ .mfi
+(p0) ldfe FR_P_2 = [GR_Table_Base], 16
+(p0) fcmp.le.unc.s1 p7, p8 = FR_Input_X, FR_Neg_Two_to_24
+ nop.i 999 ;;
+}
+{ .mbb
+(p0) ldfe FR_P_3 = [GR_Table_Base], 16
+ nop.b 999
+ nop.b 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p8) fcmp.ge.s1 p7, p0 = FR_Input_X, FR_Two_to_24
+ nop.i 999
+}
+{ .mfi
+(p0) ldfe FR_Pi_by_4 = [GR_Table_Base1], 16
+//
+// Branch if +/- zero.
+// Decide about the paths to take:
+// If -2**24 < FR_Input_X < 2**24 - CASE 1 OR 2
+// OTHERWISE - CASE 3 OR 4
+//
+(p0) fcmp.le.unc.s0 p10, p11 = FR_Input_X, FR_Neg_Two_to_63
+ nop.i 999 ;;
+}
+{ .mmi
+(p0) ldfe FR_Neg_Pi_by_4 = [GR_Table_Base1], 16 ;;
+(p0) ldfs FR_Two_to_M3 = [GR_Table_Base1], 4
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p11) fcmp.ge.s1 p10, p0 = FR_Input_X, FR_Two_to_63
+ nop.i 999 ;;
+}
+{ .mib
+(p0) ldfs FR_Neg_Two_to_M3 = [GR_Table_Base1], 12
+ nop.i 999
+//
+// Load P_2
+// Load P_3
+// Load pi_by_4
+// Load neg_pi_by_4
+// Load 2**(-3)
+// Load -2**(-3).
+//
+(p10) br.cond.spnt L(SINCOSL_ARG_TOO_LARGE) ;;
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+//
+// Branch out if x >= 2**63. Use Payne-Hanek Reduction
+//
+(p7) br.cond.spnt L(SINCOSL_LARGER_ARG) ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Branch if Arg <= -2**24 or Arg >= 2**24 and use pre-reduction.
+//
+(p0) fma.s1 FR_N_float = FR_Input_X, FR_Inv_pi_by_2, f0
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fcmp.lt.unc.s1 p6, p7 = FR_Input_X, FR_Pi_by_4
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Select the case when |Arg| < pi/4
+// Else Select the case when |Arg| >= pi/4
+//
+(p0) fcvt.fx.s1 FR_N_fix = FR_N_float
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N = Arg * 2/pi
+// Check if Arg < pi/4
+//
+(p6) fcmp.gt.s1 p6, p7 = FR_Input_X, FR_Neg_Pi_by_4
+ nop.i 999 ;;
+}
+//
+// Case 2: Convert integer N_fix back to normalized floating-point value.
+// Case 1: p8 is only affected when p6 is set
+//
+{ .mfi
+(p7) ldfs FR_Two_to_M33 = [GR_Table_Base1], 4
+//
+// Grab the integer part of N and call it N_fix
+//
+(p6) fmerge.se FR_r = FR_Input_X, FR_Input_X
+// If |x| < pi/4, r = x and c = 0
+// lf |x| < pi/4, is x < 2**(-3).
+// r = Arg
+// c = 0
+(p6) mov GR_N_Inc = GR_Sin_or_Cos ;;
+}
+{ .mmf
+ nop.m 999
+(p7) ldfs FR_Neg_Two_to_M33 = [GR_Table_Base1], 4
+(p6) fmerge.se FR_c = f0, f0
+}
+{ .mfi
+ nop.m 999
+(p6) fcmp.lt.unc.s1 p8, p9 = FR_Input_X, FR_Two_to_M3
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// lf |x| < pi/4, is -2**(-3)< x < 2**(-3) - set p8.
+// If |x| >= pi/4,
+// Create the right N for |x| < pi/4 and otherwise
+// Case 2: Place integer part of N in GP register
+//
+(p7) fcvt.xf FR_N_float = FR_N_fix
+ nop.i 999 ;;
+}
+{ .mmf
+ nop.m 999
+(p7) getf.sig GR_N_Inc = FR_N_fix
+(p8) fcmp.gt.s1 p8, p0 = FR_Input_X, FR_Neg_Two_to_M3 ;;
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+//
+// Load 2**(-33), -2**(-33)
+//
+(p8) br.cond.spnt L(SINCOSL_SMALL_R) ;;
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+(p6) br.cond.sptk L(SINCOSL_NORMAL_R) ;;
+}
+//
+// if |x| < pi/4, branch based on |x| < 2**(-3) or otherwise.
+//
+//
+// In this branch, |x| >= pi/4.
+//
+{ .mfi
+(p0) ldfs FR_Neg_Two_to_M67 = [GR_Table_Base1], 8
+//
+// Load -2**(-67)
+//
+(p0) fnma.s1 FR_s = FR_N_float, FR_P_1, FR_Input_X
+//
+// w = N * P_2
+// s = -N * P_1 + Arg
+//
+(p0) add GR_N_Inc = GR_N_Inc, GR_Sin_or_Cos
+}
+{ .mfi
+ nop.m 999
+(p0) fma.s1 FR_w = FR_N_float, FR_P_2, f0
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Adjust N_fix by N_inc to determine whether sine or
+// cosine is being calculated
+//
+(p0) fcmp.lt.unc.s1 p7, p6 = FR_s, FR_Two_to_M33
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p7) fcmp.gt.s1 p7, p6 = FR_s, FR_Neg_Two_to_M33
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+// Remember x >= pi/4.
+// Is s <= -2**(-33) or s >= 2**(-33) (p6)
+// or -2**(-33) < s < 2**(-33) (p7)
+(p6) fms.s1 FR_r = FR_s, f1, FR_w
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p7) fma.s1 FR_w = FR_N_float, FR_P_3, f0
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p7) fma.s1 FR_U_1 = FR_N_float, FR_P_2, FR_w
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p6) fms.s1 FR_c = FR_s, f1, FR_r
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// For big s: r = s - w: No futher reduction is necessary
+// For small s: w = N * P_3 (change sign) More reduction
+//
+(p6) fcmp.lt.unc.s1 p8, p9 = FR_r, FR_Two_to_M3
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p8) fcmp.gt.s1 p8, p9 = FR_r, FR_Neg_Two_to_M3
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p7) fms.s1 FR_r = FR_s, f1, FR_U_1
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+//
+// For big s: Is |r| < 2**(-3)?
+// For big s: c = S - r
+// For small s: U_1 = N * P_2 + w
+//
+// If p8 is set, prepare to branch to Small_R.
+// If p9 is set, prepare to branch to Normal_R.
+// For big s, r is complete here.
+//
+(p6) fms.s1 FR_c = FR_c, f1, FR_w
+//
+// For big s: c = c + w (w has not been negated.)
+// For small s: r = S - U_1
+//
+(p8) br.cond.spnt L(SINCOSL_SMALL_R) ;;
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+(p9) br.cond.sptk L(SINCOSL_NORMAL_R) ;;
+}
+{ .mfi
+(p7) add GR_Table_Base1 = 224, GR_Table_Base1
+//
+// Branch to SINCOSL_SMALL_R or SINCOSL_NORMAL_R
+//
+(p7) fms.s1 FR_U_2 = FR_N_float, FR_P_2, FR_U_1
+//
+// c = S - U_1
+// r = S_1 * r
+//
+//
+(p7) extr.u GR_i_1 = GR_N_Inc, 0, 1 ;;
+}
+{ .mmi
+ nop.m 999
+//
+// Get [i_0,i_1] - two lsb of N_fix_gr.
+// Do dummy fmpy so inexact is always set.
+//
+(p7) cmp.eq.unc p9, p10 = 0x0, GR_i_1
+(p7) extr.u GR_i_0 = GR_N_Inc, 1, 1 ;;
+}
+//
+// For small s: U_2 = N * P_2 - U_1
+// S_1 stored constant - grab the one stored with the
+// coefficients.
+//
+{ .mfi
+(p7) ldfe FR_S_1 = [GR_Table_Base1], 16
+//
+// Check if i_1 and i_0 != 0
+//
+(p10) fma.s1 FR_poly = f0, f1, FR_Neg_Two_to_M67
+(p7) cmp.eq.unc p11, p12 = 0x0, GR_i_0 ;;
+}
+{ .mfi
+ nop.m 999
+(p7) fms.s1 FR_s = FR_s, f1, FR_r
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+// S = S - r
+// U_2 = U_2 + w
+// load S_1
+//
+(p7) fma.s1 FR_rsq = FR_r, FR_r, f0
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p7) fma.s1 FR_U_2 = FR_U_2, f1, FR_w
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p7) fmerge.se FR_Input_X = FR_r, FR_r
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p10) fma.s1 FR_Input_X = f0, f1, f1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// FR_rsq = r * r
+// Save r as the result.
+//
+(p7) fms.s1 FR_c = FR_s, f1, FR_U_1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// if ( i_1 ==0) poly = c + S_1*r*r*r
+// else Result = 1
+//
+(p12) fnma.s1 FR_Input_X = FR_Input_X, f1, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p7) fma.s1 FR_r = FR_S_1, FR_r, f0
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p7) fma.s0 FR_S_1 = FR_S_1, FR_S_1, f0
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// If i_1 != 0, poly = 2**(-67)
+//
+(p7) fms.s1 FR_c = FR_c, f1, FR_U_2
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// c = c - U_2
+//
+(p9) fma.s1 FR_poly = FR_r, FR_rsq, FR_c
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// i_0 != 0, so Result = -Result
+//
+(p11) fma.s0 FR_Input_X = FR_Input_X, f1, FR_poly
+ nop.i 999 ;;
+}
+{ .mfb
+ nop.m 999
+(p12) fms.s0 FR_Input_X = FR_Input_X, f1, FR_poly
+//
+// if (i_0 == 0), Result = Result + poly
+// else Result = Result - poly
+//
+(p0) br.ret.sptk b0 ;;
+}
+L(SINCOSL_LARGER_ARG):
+{ .mfi
+ nop.m 999
+(p0) fma.s1 FR_N_0 = FR_Input_X, FR_Inv_P_0, f0
+ nop.i 999
+}
+;;
+
+// This path for argument > 2*24
+// Adjust table_ptr1 to beginning of table.
+//
+
+{ .mmi
+ nop.m 999
+(p0) addl GR_Table_Base = @ltoff(FSINCOSL_CONSTANTS#), gp
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ld8 GR_Table_Base = [GR_Table_Base]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+
+//
+// Point to 2*-14
+// N_0 = Arg * Inv_P_0
+//
+{ .mmi
+(p0) add GR_Table_Base = 688, GR_Table_Base ;;
+(p0) ldfs FR_Two_to_M14 = [GR_Table_Base], 4
+ nop.i 999 ;;
+}
+{ .mfi
+(p0) ldfs FR_Neg_Two_to_M14 = [GR_Table_Base], 0
+ nop.f 999
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Load values 2**(-14) and -2**(-14)
+//
+(p0) fcvt.fx.s1 FR_N_0_fix = FR_N_0
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N_0_fix = integer part of N_0
+//
+(p0) fcvt.xf FR_N_0 = FR_N_0_fix
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Make N_0 the integer part
+//
+(p0) fnma.s1 FR_ArgPrime = FR_N_0, FR_P_0, FR_Input_X
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fma.s1 FR_w = FR_N_0, FR_d_1, f0
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Arg' = -N_0 * P_0 + Arg
+// w = N_0 * d_1
+//
+(p0) fma.s1 FR_N_float = FR_ArgPrime, FR_Inv_pi_by_2, f0
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N = A' * 2/pi
+//
+(p0) fcvt.fx.s1 FR_N_fix = FR_N_float
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N_fix is the integer part
+//
+(p0) fcvt.xf FR_N_float = FR_N_fix
+ nop.i 999 ;;
+}
+{ .mfi
+(p0) getf.sig GR_N_Inc = FR_N_fix
+ nop.f 999
+ nop.i 999 ;;
+}
+{ .mii
+ nop.m 999
+ nop.i 999 ;;
+(p0) add GR_N_Inc = GR_N_Inc, GR_Sin_or_Cos ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N is the integer part of the reduced-reduced argument.
+// Put the integer in a GP register
+//
+(p0) fnma.s1 FR_s = FR_N_float, FR_P_1, FR_ArgPrime
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fnma.s1 FR_w = FR_N_float, FR_P_2, FR_w
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// s = -N*P_1 + Arg'
+// w = -N*P_2 + w
+// N_fix_gr = N_fix_gr + N_inc
+//
+(p0) fcmp.lt.unc.s1 p9, p8 = FR_s, FR_Two_to_M14
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p9) fcmp.gt.s1 p9, p8 = FR_s, FR_Neg_Two_to_M14
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// For |s| > 2**(-14) r = S + w (r complete)
+// Else U_hi = N_0 * d_1
+//
+(p9) fma.s1 FR_V_hi = FR_N_float, FR_P_2, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p9) fma.s1 FR_U_hi = FR_N_0, FR_d_1, f0
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Either S <= -2**(-14) or S >= 2**(-14)
+// or -2**(-14) < s < 2**(-14)
+//
+(p8) fma.s1 FR_r = FR_s, f1, FR_w
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p9) fma.s1 FR_w = FR_N_float, FR_P_3, f0
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// We need abs of both U_hi and V_hi - don't
+// worry about switched sign of V_hi.
+//
+(p9) fms.s1 FR_A = FR_U_hi, f1, FR_V_hi
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+// Big s: finish up c = (S - r) + w (c complete)
+// Case 4: A = U_hi + V_hi
+// Note: Worry about switched sign of V_hi, so subtract instead of add.
+//
+(p9) fnma.s1 FR_V_lo = FR_N_float, FR_P_2, FR_V_hi
+ nop.i 999 ;;
+}
+{ .mmf
+ nop.m 999
+ nop.m 999
+(p9) fms.s1 FR_U_lo = FR_N_0, FR_d_1, FR_U_hi
+}
+{ .mfi
+ nop.m 999
+(p9) fmerge.s FR_V_hiabs = f0, FR_V_hi
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+// For big s: c = S - r
+// For small s do more work: U_lo = N_0 * d_1 - U_hi
+//
+(p9) fmerge.s FR_U_hiabs = f0, FR_U_hi
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+// For big s: Is |r| < 2**(-3)
+// For big s: if p12 set, prepare to branch to Small_R.
+// For big s: If p13 set, prepare to branch to Normal_R.
+//
+(p8) fms.s1 FR_c = FR_s, f1, FR_r
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// For small S: V_hi = N * P_2
+// w = N * P_3
+// Note the product does not include the (-) as in the writeup
+// so (-) missing for V_hi and w.
+//
+(p8) fcmp.lt.unc.s1 p12, p13 = FR_r, FR_Two_to_M3
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p12) fcmp.gt.s1 p12, p13 = FR_r, FR_Neg_Two_to_M3
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p8) fma.s1 FR_c = FR_c, f1, FR_w
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p9) fms.s1 FR_w = FR_N_0, FR_d_2, FR_w
+(p12) br.cond.spnt L(SINCOSL_SMALL_R) ;;
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+(p13) br.cond.sptk L(SINCOSL_NORMAL_R) ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Big s: Vector off when |r| < 2**(-3). Recall that p8 will be true.
+// The remaining stuff is for Case 4.
+// Small s: V_lo = N * P_2 + U_hi (U_hi is in place of V_hi in writeup)
+// Note: the (-) is still missing for V_lo.
+// Small s: w = w + N_0 * d_2
+// Note: the (-) is now incorporated in w.
+//
+(p9) fcmp.ge.unc.s1 p10, p11 = FR_U_hiabs, FR_V_hiabs
+(p0) extr.u GR_i_1 = GR_N_Inc, 0, 1
+}
+{ .mfi
+ nop.m 999
+//
+// C_hi = S + A
+//
+(p9) fma.s1 FR_t = FR_U_lo, f1, FR_V_lo
+(p0) extr.u GR_i_0 = GR_N_Inc, 1, 1 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// t = U_lo + V_lo
+//
+//
+(p10) fms.s1 FR_a = FR_U_hi, f1, FR_A
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p11) fma.s1 FR_a = FR_V_hi, f1, FR_A
+ nop.i 999
+}
+;;
+
+{ .mmi
+ nop.m 999
+(p0) addl GR_Table_Base = @ltoff(FSINCOSL_CONSTANTS#), gp
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ld8 GR_Table_Base = [GR_Table_Base]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+
+{ .mfi
+(p0) add GR_Table_Base = 528, GR_Table_Base
+//
+// Is U_hiabs >= V_hiabs?
+//
+(p9) fma.s1 FR_C_hi = FR_s, f1, FR_A
+ nop.i 999 ;;
+}
+{ .mmi
+(p0) ldfe FR_C_1 = [GR_Table_Base], 16 ;;
+(p0) ldfe FR_C_2 = [GR_Table_Base], 64
+ nop.i 999 ;;
+}
+//
+// c = c + C_lo finished.
+// Load C_2
+//
+{ .mfi
+(p0) ldfe FR_S_1 = [GR_Table_Base], 16
+//
+// C_lo = S - C_hi
+//
+(p0) fma.s1 FR_t = FR_t, f1, FR_w
+ nop.i 999 ;;
+}
+//
+// r and c have been computed.
+// Make sure ftz mode is set - should be automatic when using wre
+// |r| < 2**(-3)
+// Get [i_0,i_1] - two lsb of N_fix.
+// Load S_1
+//
+{ .mfi
+(p0) ldfe FR_S_2 = [GR_Table_Base], 64
+//
+// t = t + w
+//
+(p10) fms.s1 FR_a = FR_a, f1, FR_V_hi
+(p0) cmp.eq.unc p9, p10 = 0x0, GR_i_0 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// For larger u than v: a = U_hi - A
+// Else a = V_hi - A (do an add to account for missing (-) on V_hi
+//
+(p0) fms.s1 FR_C_lo = FR_s, f1, FR_C_hi
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p11) fms.s1 FR_a = FR_U_hi, f1, FR_a
+(p0) cmp.eq.unc p11, p12 = 0x0, GR_i_1 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// If u > v: a = (U_hi - A) + V_hi
+// Else a = (V_hi - A) + U_hi
+// In each case account for negative missing from V_hi.
+//
+(p0) fma.s1 FR_C_lo = FR_C_lo, f1, FR_A
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// C_lo = (S - C_hi) + A
+//
+(p0) fma.s1 FR_t = FR_t, f1, FR_a
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// t = t + a
+//
+(p0) fma.s1 FR_C_lo = FR_C_lo, f1, FR_t
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// C_lo = C_lo + t
+// Adjust Table_Base to beginning of table
+//
+(p0) fma.s1 FR_r = FR_C_hi, f1, FR_C_lo
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Load S_2
+//
+(p0) fma.s1 FR_rsq = FR_r, FR_r, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+// Table_Base points to C_1
+// r = C_hi + C_lo
+//
+(p0) fms.s1 FR_c = FR_C_hi, f1, FR_r
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// if i_1 ==0: poly = S_2 * FR_rsq + S_1
+// else poly = C_2 * FR_rsq + C_1
+//
+(p11) fma.s1 FR_Input_X = f0, f1, FR_r
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p12) fma.s1 FR_Input_X = f0, f1, f1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Compute r_cube = FR_rsq * r
+//
+(p11) fma.s1 FR_poly = FR_rsq, FR_S_2, FR_S_1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p12) fma.s1 FR_poly = FR_rsq, FR_C_2, FR_C_1
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+// Compute FR_rsq = r * r
+// Is i_1 == 0 ?
+//
+(p0) fma.s1 FR_r_cubed = FR_rsq, FR_r, f0
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// c = C_hi - r
+// Load C_1
+//
+(p0) fma.s1 FR_c = FR_c, f1, FR_C_lo
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+// if i_1 ==0: poly = r_cube * poly + c
+// else poly = FR_rsq * poly
+//
+(p10) fms.s1 FR_Input_X = f0, f1, FR_Input_X
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// if i_1 ==0: Result = r
+// else Result = 1.0
+//
+(p11) fma.s1 FR_poly = FR_r_cubed, FR_poly, FR_c
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p12) fma.s1 FR_poly = FR_rsq, FR_poly, f0
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// if i_0 !=0: Result = -Result
+//
+(p9) fma.s0 FR_Input_X = FR_Input_X, f1, FR_poly
+ nop.i 999 ;;
+}
+{ .mfb
+ nop.m 999
+(p10) fms.s0 FR_Input_X = FR_Input_X, f1, FR_poly
+//
+// if i_0 == 0: Result = Result + poly
+// else Result = Result - poly
+//
+(p0) br.ret.sptk b0 ;;
+}
+L(SINCOSL_SMALL_R):
+{ .mii
+ nop.m 999
+(p0) extr.u GR_i_1 = GR_N_Inc, 0, 1 ;;
+//
+//
+// Compare both i_1 and i_0 with 0.
+// if i_1 == 0, set p9.
+// if i_0 == 0, set p11.
+//
+(p0) cmp.eq.unc p9, p10 = 0x0, GR_i_1 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fma.s1 FR_rsq = FR_r, FR_r, f0
+(p0) extr.u GR_i_0 = GR_N_Inc, 1, 1 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Z = Z * FR_rsq
+//
+(p10) fnma.s1 FR_c = FR_c, FR_r, f0
+(p0) cmp.eq.unc p11, p12 = 0x0, GR_i_0
+}
+;;
+
+// ******************************************************************
+// ******************************************************************
+// ******************************************************************
+// r and c have been computed.
+// We know whether this is the sine or cosine routine.
+// Make sure ftz mode is set - should be automatic when using wre
+// |r| < 2**(-3)
+//
+// Set table_ptr1 to beginning of constant table.
+// Get [i_0,i_1] - two lsb of N_fix_gr.
+//
+
+{ .mmi
+ nop.m 999
+(p0) addl GR_Table_Base = @ltoff(FSINCOSL_CONSTANTS#), gp
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ld8 GR_Table_Base = [GR_Table_Base]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+
+//
+// Set table_ptr1 to point to S_5.
+// Set table_ptr1 to point to C_5.
+// Compute FR_rsq = r * r
+//
+{ .mfi
+(p9) add GR_Table_Base = 672, GR_Table_Base
+(p10) fmerge.s FR_r = f1, f1
+(p10) add GR_Table_Base = 592, GR_Table_Base ;;
+}
+//
+// Set table_ptr1 to point to S_5.
+// Set table_ptr1 to point to C_5.
+//
+{ .mmi
+(p9) ldfe FR_S_5 = [GR_Table_Base], -16 ;;
+//
+// if (i_1 == 0) load S_5
+// if (i_1 != 0) load C_5
+//
+(p9) ldfe FR_S_4 = [GR_Table_Base], -16
+ nop.i 999 ;;
+}
+{ .mmf
+(p10) ldfe FR_C_5 = [GR_Table_Base], -16
+//
+// Z = FR_rsq * FR_rsq
+//
+(p9) ldfe FR_S_3 = [GR_Table_Base], -16
+//
+// Compute FR_rsq = r * r
+// if (i_1 == 0) load S_4
+// if (i_1 != 0) load C_4
+//
+(p0) fma.s1 FR_Z = FR_rsq, FR_rsq, f0 ;;
+}
+//
+// if (i_1 == 0) load S_3
+// if (i_1 != 0) load C_3
+//
+{ .mmi
+(p9) ldfe FR_S_2 = [GR_Table_Base], -16 ;;
+//
+// if (i_1 == 0) load S_2
+// if (i_1 != 0) load C_2
+//
+(p9) ldfe FR_S_1 = [GR_Table_Base], -16
+ nop.i 999
+}
+{ .mmi
+(p10) ldfe FR_C_4 = [GR_Table_Base], -16 ;;
+(p10) ldfe FR_C_3 = [GR_Table_Base], -16
+ nop.i 999 ;;
+}
+{ .mmi
+(p10) ldfe FR_C_2 = [GR_Table_Base], -16 ;;
+(p10) ldfe FR_C_1 = [GR_Table_Base], -16
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+// if (i_1 != 0):
+// poly_lo = FR_rsq * C_5 + C_4
+// poly_hi = FR_rsq * C_2 + C_1
+//
+(p9) fma.s1 FR_Z = FR_Z, FR_r, f0
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// if (i_1 == 0) load S_1
+// if (i_1 != 0) load C_1
+//
+(p9) fma.s1 FR_poly_lo = FR_rsq, FR_S_5, FR_S_4
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+// c = -c * r
+// dummy fmpy's to flag inexact.
+//
+(p9) fma.s0 FR_S_4 = FR_S_4, FR_S_4, f0
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// poly_lo = FR_rsq * poly_lo + C_3
+// poly_hi = FR_rsq * poly_hi
+//
+(p0) fma.s1 FR_Z = FR_Z, FR_rsq, f0
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p9) fma.s1 FR_poly_hi = FR_rsq, FR_S_2, FR_S_1
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+// if (i_1 == 0):
+// poly_lo = FR_rsq * S_5 + S_4
+// poly_hi = FR_rsq * S_2 + S_1
+//
+(p10) fma.s1 FR_poly_lo = FR_rsq, FR_C_5, FR_C_4
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// if (i_1 == 0):
+// Z = Z * r for only one of the small r cases - not there
+// in original implementation notes.
+//
+(p9) fma.s1 FR_poly_lo = FR_rsq, FR_poly_lo, FR_S_3
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p10) fma.s1 FR_poly_hi = FR_rsq, FR_C_2, FR_C_1
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p10) fma.s0 FR_C_1 = FR_C_1, FR_C_1, f0
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p9) fma.s1 FR_poly_hi = FR_poly_hi, FR_rsq, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+// poly_lo = FR_rsq * poly_lo + S_3
+// poly_hi = FR_rsq * poly_hi
+//
+(p10) fma.s1 FR_poly_lo = FR_rsq, FR_poly_lo, FR_C_3
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p10) fma.s1 FR_poly_hi = FR_poly_hi, FR_rsq, f0
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// if (i_1 == 0): dummy fmpy's to flag inexact
+// r = 1
+//
+(p9) fma.s1 FR_poly_hi = FR_r, FR_poly_hi, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+// poly_hi = r * poly_hi
+//
+(p0) fma.s1 FR_poly = FR_Z, FR_poly_lo, FR_c
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p12) fms.s1 FR_r = f0, f1, FR_r
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// poly_hi = Z * poly_lo + c
+// if i_0 == 1: r = -r
+//
+(p0) fma.s1 FR_poly = FR_poly, f1, FR_poly_hi
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p12) fms.s0 FR_Input_X = FR_r, f1, FR_poly
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+//
+// poly = poly + poly_hi
+//
+(p11) fma.s0 FR_Input_X = FR_r, f1, FR_poly
+//
+// if (i_0 == 0) Result = r + poly
+// if (i_0 != 0) Result = r - poly
+//
+(p0) br.ret.sptk b0 ;;
+}
+L(SINCOSL_NORMAL_R):
+{ .mii
+ nop.m 999
+(p0) extr.u GR_i_1 = GR_N_Inc, 0, 1 ;;
+//
+// Set table_ptr1 and table_ptr2 to base address of
+// constant table.
+(p0) cmp.eq.unc p9, p10 = 0x0, GR_i_1 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fma.s1 FR_rsq = FR_r, FR_r, f0
+(p0) extr.u GR_i_0 = GR_N_Inc, 1, 1 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) frcpa.s1 FR_r_hi, p6 = f1, FR_r
+(p0) cmp.eq.unc p11, p12 = 0x0, GR_i_0
+}
+;;
+
+// ******************************************************************
+// ******************************************************************
+// ******************************************************************
+//
+// r and c have been computed.
+// We known whether this is the sine or cosine routine.
+// Make sure ftz mode is set - should be automatic when using wre
+// Get [i_0,i_1] - two lsb of N_fix_gr alone.
+//
+
+{ .mmi
+ nop.m 999
+(p0) addl GR_Table_Base = @ltoff(FSINCOSL_CONSTANTS#), gp
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ld8 GR_Table_Base = [GR_Table_Base]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+
+{ .mfi
+(p10) add GR_Table_Base = 384, GR_Table_Base
+(p12) fms.s1 FR_Input_X = f0, f1, f1
+(p9) add GR_Table_Base = 224, GR_Table_Base ;;
+}
+{ .mfi
+(p10) ldfe FR_QQ_8 = [GR_Table_Base], 16
+//
+// if (i_1==0) poly = poly * FR_rsq + PP_1_lo
+// else poly = FR_rsq * poly
+//
+(p11) fma.s1 FR_Input_X = f0, f1, f1
+ nop.i 999 ;;
+}
+{ .mmb
+(p10) ldfe FR_QQ_7 = [GR_Table_Base], 16
+//
+// Adjust table pointers based on i_0
+// Compute rsq = r * r
+//
+(p9) ldfe FR_PP_8 = [GR_Table_Base], 16
+ nop.b 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fma.s1 FR_r_cubed = FR_r, FR_rsq, f0
+ nop.i 999 ;;
+}
+{ .mmf
+(p9) ldfe FR_PP_7 = [GR_Table_Base], 16
+(p10) ldfe FR_QQ_6 = [GR_Table_Base], 16
+//
+// Load PP_8 and QQ_8; PP_7 and QQ_7
+//
+(p0) frcpa.s1 FR_r_hi, p6 = f1, FR_r_hi ;;
+}
+//
+// if (i_1==0) poly = PP_7 + FR_rsq * PP_8.
+// else poly = QQ_7 + FR_rsq * QQ_8.
+//
+{ .mmb
+(p9) ldfe FR_PP_6 = [GR_Table_Base], 16
+(p10) ldfe FR_QQ_5 = [GR_Table_Base], 16
+ nop.b 999 ;;
+}
+{ .mmb
+(p9) ldfe FR_PP_5 = [GR_Table_Base], 16
+(p10) ldfe FR_S_1 = [GR_Table_Base], 16
+ nop.b 999 ;;
+}
+{ .mmb
+(p10) ldfe FR_QQ_1 = [GR_Table_Base], 16
+(p9) ldfe FR_C_1 = [GR_Table_Base], 16
+ nop.b 999 ;;
+}
+{ .mmb
+(p10) ldfe FR_QQ_4 = [GR_Table_Base], 16
+(p9) ldfe FR_PP_1 = [GR_Table_Base], 16
+ nop.b 999 ;;
+}
+{ .mmb
+(p10) ldfe FR_QQ_3 = [GR_Table_Base], 16
+//
+// if (i_1=0) corr = corr + c*c
+// else corr = corr * c
+//
+(p9) ldfe FR_PP_4 = [GR_Table_Base], 16
+ nop.b 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p10) fma.s1 FR_poly = FR_rsq, FR_QQ_8, FR_QQ_7
+ nop.i 999 ;;
+}
+//
+// if (i_1=0) poly = rsq * poly + PP_5
+// else poly = rsq * poly + QQ_5
+// Load PP_4 or QQ_4
+//
+{ .mmi
+(p9) ldfe FR_PP_3 = [GR_Table_Base], 16 ;;
+(p10) ldfe FR_QQ_2 = [GR_Table_Base], 16
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+// r_hi = frcpa(frcpa(r)).
+// r_cube = r * FR_rsq.
+//
+(p9) fma.s1 FR_poly = FR_rsq, FR_PP_8, FR_PP_7
+ nop.i 999 ;;
+}
+//
+// Do dummy multiplies so inexact is always set.
+//
+{ .mfi
+(p9) ldfe FR_PP_2 = [GR_Table_Base], 16
+//
+// r_lo = r - r_hi
+//
+(p9) fma.s1 FR_U_lo = FR_r_hi, FR_r_hi, f0
+ nop.i 999 ;;
+}
+{ .mbb
+(p9) ldfe FR_PP_1_lo = [GR_Table_Base], 16
+ nop.b 999
+ nop.b 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p10) fma.s1 FR_corr = FR_S_1, FR_r_cubed, FR_r
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p10) fma.s1 FR_poly = FR_rsq, FR_poly, FR_QQ_6
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// if (i_1=0) U_lo = r_hi * r_hi
+// else U_lo = r_hi + r
+//
+(p9) fma.s1 FR_corr = FR_C_1, FR_rsq, f0
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// if (i_1=0) corr = C_1 * rsq
+// else corr = S_1 * r_cubed + r
+//
+(p9) fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_6
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p10) fma.s1 FR_U_lo = FR_r_hi, f1, FR_r
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+// if (i_1=0) U_hi = r_hi + U_hi
+// else U_hi = QQ_1 * U_hi + 1
+//
+(p9) fma.s1 FR_U_lo = FR_r, FR_r_hi, FR_U_lo
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// U_hi = r_hi * r_hi
+//
+(p0) fms.s1 FR_r_lo = FR_r, f1, FR_r_hi
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+// Load PP_1, PP_6, PP_5, and C_1
+// Load QQ_1, QQ_6, QQ_5, and S_1
+//
+(p0) fma.s1 FR_U_hi = FR_r_hi, FR_r_hi, f0
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p10) fma.s1 FR_poly = FR_rsq, FR_poly, FR_QQ_5
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p10) fnma.s1 FR_corr = FR_corr, FR_c, f0
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// if (i_1=0) U_lo = r * r_hi + U_lo
+// else U_lo = r_lo * U_lo
+//
+(p9) fma.s1 FR_corr = FR_corr, FR_c, FR_c
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p9) fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_5
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+// if (i_1 =0) U_hi = r + U_hi
+// if (i_1 =0) U_lo = r_lo * U_lo
+//
+//
+(p9) fma.s0 FR_PP_5 = FR_PP_5, FR_PP_4, f0
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p9) fma.s1 FR_U_lo = FR_r, FR_r, FR_U_lo
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p10) fma.s1 FR_U_lo = FR_r_lo, FR_U_lo, f0
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// if (i_1=0) poly = poly * rsq + PP_6
+// else poly = poly * rsq + QQ_6
+//
+(p9) fma.s1 FR_U_hi = FR_r_hi, FR_U_hi, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p10) fma.s1 FR_poly = FR_rsq, FR_poly, FR_QQ_4
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p10) fma.s1 FR_U_hi = FR_QQ_1, FR_U_hi, f1
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p10) fma.s0 FR_QQ_5 = FR_QQ_5, FR_QQ_5, f0
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// if (i_1!=0) U_hi = PP_1 * U_hi
+// if (i_1!=0) U_lo = r * r + U_lo
+// Load PP_3 or QQ_3
+//
+(p9) fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_4
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p9) fma.s1 FR_U_lo = FR_r_lo, FR_U_lo, f0
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p10) fma.s1 FR_U_lo = FR_QQ_1,FR_U_lo, f0
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p9) fma.s1 FR_U_hi = FR_PP_1, FR_U_hi, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p10) fma.s1 FR_poly = FR_rsq, FR_poly, FR_QQ_3
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Load PP_2, QQ_2
+//
+(p9) fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_3
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// if (i_1==0) poly = FR_rsq * poly + PP_3
+// else poly = FR_rsq * poly + QQ_3
+// Load PP_1_lo
+//
+(p9) fma.s1 FR_U_lo = FR_PP_1, FR_U_lo, f0
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// if (i_1 =0) poly = poly * rsq + pp_r4
+// else poly = poly * rsq + qq_r4
+//
+(p9) fma.s1 FR_U_hi = FR_r, f1, FR_U_hi
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p10) fma.s1 FR_poly = FR_rsq, FR_poly, FR_QQ_2
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// if (i_1==0) U_lo = PP_1_hi * U_lo
+// else U_lo = QQ_1 * U_lo
+//
+(p9) fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_2
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// if (i_0==0) Result = 1
+// else Result = -1
+//
+(p0) fma.s1 FR_V = FR_U_lo, f1, FR_corr
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p10) fma.s1 FR_poly = FR_rsq, FR_poly, f0
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// if (i_1==0) poly = FR_rsq * poly + PP_2
+// else poly = FR_rsq * poly + QQ_2
+//
+(p9) fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_1_lo
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p10) fma.s1 FR_poly = FR_rsq, FR_poly, f0
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// V = U_lo + corr
+//
+(p9) fma.s1 FR_poly = FR_r_cubed, FR_poly, f0
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// if (i_1==0) poly = r_cube * poly
+// else poly = FR_rsq * poly
+//
+(p0) fma.s1 FR_V = FR_poly, f1, FR_V
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p12) fms.s0 FR_Input_X = FR_Input_X, FR_U_hi, FR_V
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+//
+// V = V + poly
+//
+(p11) fma.s0 FR_Input_X = FR_Input_X, FR_U_hi, FR_V
+//
+// if (i_0==0) Result = Result * U_hi + V
+// else Result = Result * U_hi - V
+//
+(p0) br.ret.sptk b0
+};;
+
+//
+// If cosine, FR_Input_X = 1
+// If sine, FR_Input_X = +/-Zero (Input FR_Input_X)
+// Results are exact, no exceptions
+//
+
+L(SINCOSL_ZERO):
+{ .mbb
+(p0) cmp.eq.unc p6, p7 = 0x1, GR_Sin_or_Cos
+ nop.b 999
+ nop.b 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p7) fmerge.s FR_Input_X = FR_Input_X, FR_Input_X
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p6) fmerge.s FR_Input_X = f1, f1
+(p0) br.ret.sptk b0 ;;
+}
+L(SINCOSL_SPECIAL):
+{ .mfb
+ nop.m 999
+//
+// Path for Arg = +/- QNaN, SNaN, Inf
+// Invalid can be raised. SNaNs
+// become QNaNs
+//
+(p0) fmpy.s0 FR_Input_X = FR_Input_X, f0
+(p0) br.ret.sptk b0 ;;
+}
+.endp cosl#
+ASM_SIZE_DIRECTIVE(cosl#)
+
+// Call int pi_by_2_reduce(double* x, double *y)
+// for |arguments| >= 2**63
+// Address to save r and c as double
+//
+// sp+32 -> f0
+// r45 sp+16 -> f0
+// r44 -> sp -> InputX
+//
+
+.proc __libm_callout
+__libm_callout:
+L(SINCOSL_ARG_TOO_LARGE):
+.prologue
+{ .mfi
+ add r45=-32,sp // Parameter: r address
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfe [r45] = f0,16 // Clear Parameter r on stack
+ add r44 = 16,sp // Parameter x address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mib
+ stfe [r45] = f0,-16 // Clear Parameter c on stack
+ nop.i 0
+ nop.b 0
+}
+{ .mib
+ stfe [r44] = FR_Input_X // Store Parameter x on stack
+ nop.i 0
+(p0) br.call.sptk b0=__libm_pi_by_2_reduce# ;;
+};;
+{ .mii
+(p0) ldfe FR_Input_X =[r44],16
+//
+// Get r and c off stack
+//
+(p0) adds GR_Table_Base1 = -16, GR_Table_Base1
+//
+// Get r and c off stack
+//
+(p0) add GR_N_Inc = GR_Sin_or_Cos,r8 ;;
+}
+{ .mmb
+(p0) ldfe FR_r =[r45],16
+//
+// Get X off the stack
+// Readjust Table ptr
+//
+(p0) ldfs FR_Two_to_M3 = [GR_Table_Base1],4
+ nop.b 999 ;;
+}
+{ .mmb
+(p0) ldfs FR_Neg_Two_to_M3 = [GR_Table_Base1],0
+(p0) ldfe FR_c =[r45]
+ nop.b 999 ;;
+}
+{ .mfi
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+(p0) fcmp.lt.unc.s1 p6, p0 = FR_r, FR_Two_to_M3
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ nop.b 0
+};;
+{ .mfi
+ nop.m 999
+(p6) fcmp.gt.unc.s1 p6, p0 = FR_r, FR_Neg_Two_to_M3
+ nop.i 999 ;;
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+(p6) br.cond.spnt L(SINCOSL_SMALL_R) ;;
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+(p0) br.cond.sptk L(SINCOSL_NORMAL_R) ;;
+}
+.endp __libm_callout
+ASM_SIZE_DIRECTIVE(__libm_callout)
+.type __libm_pi_by_2_reduce#,@function
+.global __libm_pi_by_2_reduce#
diff --git a/sysdeps/ia64/fpu/s_expm1.S b/sysdeps/ia64/fpu/s_expm1.S
new file mode 100644
index 0000000..840b1c0
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_expm1.S
@@ -0,0 +1,1755 @@
+.file "exp_m1.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// HISTORY
+// 2/02/00 Initial Version
+// 4/04/00 Unwind support added
+// 8/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+//
+// *********************************************************************
+//
+// Function: Combined exp(x) and expm1(x), where
+// x
+// exp(x) = e , for double precision x values
+// x
+// expm1(x) = e - 1 for double precision x values
+//
+// *********************************************************************
+//
+// Accuracy: Within .7 ulps for 80-bit floating point values
+// Very accurate for double precision values
+//
+// *********************************************************************
+//
+// Resources Used:
+//
+// Floating-Point Registers: f8 (Input and Return Value)
+// f9,f32-f61, f99-f102
+//
+// General Purpose Registers:
+// r32-r61
+// r62-r65 (Used to pass arguments to error handling routine)
+//
+// Predicate Registers: p6-p15
+//
+// *********************************************************************
+//
+// IEEE Special Conditions:
+//
+// Denormal fault raised on denormal inputs
+// Overflow exceptions raised when appropriate for exp and expm1
+// Underflow exceptions raised when appropriate for exp and expm1
+// (Error Handling Routine called for overflow and Underflow)
+// Inexact raised when appropriate by algorithm
+//
+// exp(inf) = inf
+// exp(-inf) = +0
+// exp(SNaN) = QNaN
+// exp(QNaN) = QNaN
+// exp(0) = 1
+// exp(EM_special Values) = QNaN
+// exp(inf) = inf
+// expm1(-inf) = -1
+// expm1(SNaN) = QNaN
+// expm1(QNaN) = QNaN
+// expm1(0) = 0
+// expm1(EM_special Values) = QNaN
+//
+// *********************************************************************
+//
+// Implementation and Algorithm Notes:
+//
+// ker_exp_64( in_FR : X,
+// in_GR : Flag,
+// in_GR : Expo_Range
+// out_FR : Y_hi,
+// out_FR : Y_lo,
+// out_FR : scale,
+// out_PR : Safe )
+//
+// On input, X is in register format and
+// Flag = 0 for exp,
+// Flag = 1 for expm1,
+//
+// On output, provided X and X_cor are real numbers, then
+//
+// scale*(Y_hi + Y_lo) approximates exp(X) if Flag is 0
+// scale*(Y_hi + Y_lo) approximates exp(X)-1 if Flag is 1
+//
+// The accuracy is sufficient for a highly accurate 64 sig.
+// bit implementation. Safe is set if there is no danger of
+// overflow/underflow when the result is composed from scale,
+// Y_hi and Y_lo. Thus, we can have a fast return if Safe is set.
+// Otherwise, one must prepare to handle the possible exception
+// appropriately. Note that SAFE not set (false) does not mean
+// that overflow/underflow will occur; only the setting of SAFE
+// guarantees the opposite.
+//
+// **** High Level Overview ****
+//
+// The method consists of three cases.
+//
+// If |X| < Tiny use case exp_tiny;
+// else if |X| < 2^(-6) use case exp_small;
+// else use case exp_regular;
+//
+// Case exp_tiny:
+//
+// 1 + X can be used to approximate exp(X) or exp(X+X_cor);
+// X + X^2/2 can be used to approximate exp(X) - 1
+//
+// Case exp_small:
+//
+// Here, exp(X), exp(X+X_cor), and exp(X) - 1 can all be
+// appproximated by a relatively simple polynomial.
+//
+// This polynomial resembles the truncated Taylor series
+//
+// exp(w) = 1 + w + w^2/2! + w^3/3! + ... + w^n/n!
+//
+// Case exp_regular:
+//
+// Here we use a table lookup method. The basic idea is that in
+// order to compute exp(X), we accurately decompose X into
+//
+// X = N * log(2)/(2^12) + r, |r| <= log(2)/2^13.
+//
+// Hence
+//
+// exp(X) = 2^( N / 2^12 ) * exp(r).
+//
+// The value 2^( N / 2^12 ) is obtained by simple combinations
+// of values calculated beforehand and stored in table; exp(r)
+// is approximated by a short polynomial because |r| is small.
+//
+// We elaborate this method in 4 steps.
+//
+// Step 1: Reduction
+//
+// The value 2^12/log(2) is stored as a double-extended number
+// L_Inv.
+//
+// N := round_to_nearest_integer( X * L_Inv )
+//
+// The value log(2)/2^12 is stored as two numbers L_hi and L_lo so
+// that r can be computed accurately via
+//
+// r := (X - N*L_hi) - N*L_lo
+//
+// We pick L_hi such that N*L_hi is representable in 64 sig. bits
+// and thus the FMA X - N*L_hi is error free. So r is the
+// 1 rounding error from an exact reduction with respect to
+//
+// L_hi + L_lo.
+//
+// In particular, L_hi has 30 significant bit and can be stored
+// as a double-precision number; L_lo has 64 significant bits and
+// stored as a double-extended number.
+//
+// In the case Flag = 2, we further modify r by
+//
+// r := r + X_cor.
+//
+// Step 2: Approximation
+//
+// exp(r) - 1 is approximated by a short polynomial of the form
+//
+// r + A_1 r^2 + A_2 r^3 + A_3 r^4 .
+//
+// Step 3: Composition from Table Values
+//
+// The value 2^( N / 2^12 ) can be composed from a couple of tables
+// of precalculated values. First, express N as three integers
+// K, M_1, and M_2 as
+//
+// N = K * 2^12 + M_1 * 2^6 + M_2
+//
+// Where 0 <= M_1, M_2 < 2^6; and K can be positive or negative.
+// When N is represented in 2's complement, M_2 is simply the 6
+// lsb's, M_1 is the next 6, and K is simply N shifted right
+// arithmetically (sign extended) by 12 bits.
+//
+// Now, 2^( N / 2^12 ) is simply
+//
+// 2^K * 2^( M_1 / 2^6 ) * 2^( M_2 / 2^12 )
+//
+// Clearly, 2^K needs no tabulation. The other two values are less
+// trivial because if we store each accurately to more than working
+// precision, than its product is too expensive to calculate. We
+// use the following method.
+//
+// Define two mathematical values, delta_1 and delta_2, implicitly
+// such that
+//
+// T_1 = exp( [M_1 log(2)/2^6] - delta_1 )
+// T_2 = exp( [M_2 log(2)/2^12] - delta_2 )
+//
+// are representable as 24 significant bits. To illustrate the idea,
+// we show how we define delta_1:
+//
+// T_1 := round_to_24_bits( exp( M_1 log(2)/2^6 ) )
+// delta_1 = (M_1 log(2)/2^6) - log( T_1 )
+//
+// The last equality means mathematical equality. We then tabulate
+//
+// W_1 := exp(delta_1) - 1
+// W_2 := exp(delta_2) - 1
+//
+// Both in double precision.
+//
+// From the tabulated values T_1, T_2, W_1, W_2, we compose the values
+// T and W via
+//
+// T := T_1 * T_2 ...exactly
+// W := W_1 + (1 + W_1)*W_2
+//
+// W approximates exp( delta ) - 1 where delta = delta_1 + delta_2.
+// The mathematical product of T and (W+1) is an accurate representation
+// of 2^(M_1/2^6) * 2^(M_2/2^12).
+//
+// Step 4. Reconstruction
+//
+// Finally, we can reconstruct exp(X), exp(X) - 1.
+// Because
+//
+// X = K * log(2) + (M_1*log(2)/2^6 - delta_1)
+// + (M_2*log(2)/2^12 - delta_2)
+// + delta_1 + delta_2 + r ...accurately
+// We have
+//
+// exp(X) ~=~ 2^K * ( T + T*[exp(delta_1+delta_2+r) - 1] )
+// ~=~ 2^K * ( T + T*[exp(delta + r) - 1] )
+// ~=~ 2^K * ( T + T*[(exp(delta)-1)
+// + exp(delta)*(exp(r)-1)] )
+// ~=~ 2^K * ( T + T*( W + (1+W)*poly(r) ) )
+// ~=~ 2^K * ( Y_hi + Y_lo )
+//
+// where Y_hi = T and Y_lo = T*(W + (1+W)*poly(r))
+//
+// For exp(X)-1, we have
+//
+// exp(X)-1 ~=~ 2^K * ( Y_hi + Y_lo ) - 1
+// ~=~ 2^K * ( Y_hi + Y_lo - 2^(-K) )
+//
+// and we combine Y_hi + Y_lo - 2^(-N) into the form of two
+// numbers Y_hi + Y_lo carefully.
+//
+// **** Algorithm Details ****
+//
+// A careful algorithm must be used to realize the mathematical ideas
+// accurately. We describe each of the three cases. We assume SAFE
+// is preset to be TRUE.
+//
+// Case exp_tiny:
+//
+// The important points are to ensure an accurate result under
+// different rounding directions and a correct setting of the SAFE
+// flag.
+//
+// If Flag is 1, then
+// SAFE := False ...possibility of underflow
+// Scale := 1.0
+// Y_hi := X
+// Y_lo := 2^(-17000)
+// Else
+// Scale := 1.0
+// Y_hi := 1.0
+// Y_lo := X ...for different rounding modes
+// Endif
+//
+// Case exp_small:
+//
+// Here we compute a simple polynomial. To exploit parallelism, we split
+// the polynomial into several portions.
+//
+// Let r = X
+//
+// If Flag is not 1 ...i.e. exp( argument )
+//
+// rsq := r * r;
+// r4 := rsq*rsq
+// poly_lo := P_3 + r*(P_4 + r*(P_5 + r*P_6))
+// poly_hi := r + rsq*(P_1 + r*P_2)
+// Y_lo := poly_hi + r4 * poly_lo
+// set lsb(Y_lo) to 1
+// Y_hi := 1.0
+// Scale := 1.0
+//
+// Else ...i.e. exp( argument ) - 1
+//
+// rsq := r * r
+// r4 := rsq * rsq
+// r6 := rsq * r4
+// poly_lo := r6*(Q_5 + r*(Q_6 + r*Q_7))
+// poly_hi := Q_1 + r*(Q_2 + r*(Q_3 + r*Q_4))
+// Y_lo := rsq*poly_hi + poly_lo
+// set lsb(Y_lo) to 1
+// Y_hi := X
+// Scale := 1.0
+//
+// Endif
+//
+// Case exp_regular:
+//
+// The previous description contain enough information except the
+// computation of poly and the final Y_hi and Y_lo in the case for
+// exp(X)-1.
+//
+// The computation of poly for Step 2:
+//
+// rsq := r*r
+// poly := r + rsq*(A_1 + r*(A_2 + r*A_3))
+//
+// For the case exp(X) - 1, we need to incorporate 2^(-K) into
+// Y_hi and Y_lo at the end of Step 4.
+//
+// If K > 10 then
+// Y_lo := Y_lo - 2^(-K)
+// Else
+// If K < -10 then
+// Y_lo := Y_hi + Y_lo
+// Y_hi := -2^(-K)
+// Else
+// Y_hi := Y_hi - 2^(-K)
+// End If
+// End If
+//
+
+#include "libm_support.h"
+
+GR_SAVE_PFS = r59
+GR_SAVE_B0 = r60
+GR_SAVE_GP = r61
+
+GR_Parameter_X = r62
+GR_Parameter_Y = r63
+GR_Parameter_RESULT = r64
+
+FR_X = f9
+FR_Y = f1
+FR_RESULT = f99
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
+.align 64
+Constants_exp_64_Arg:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_Arg,@object)
+data4 0x5C17F0BC,0xB8AA3B29,0x0000400B,0x00000000
+data4 0x00000000,0xB17217F4,0x00003FF2,0x00000000
+data4 0xF278ECE6,0xF473DE6A,0x00003FD4,0x00000000
+// /* Inv_L, L_hi, L_lo */
+ASM_SIZE_DIRECTIVE(Constants_exp_64_Arg)
+
+.align 64
+Constants_exp_64_Exponents:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_Exponents,@object)
+data4 0x0000007E,0x00000000,0xFFFFFF83,0xFFFFFFFF
+data4 0x000003FE,0x00000000,0xFFFFFC03,0xFFFFFFFF
+data4 0x00003FFE,0x00000000,0xFFFFC003,0xFFFFFFFF
+data4 0x00003FFE,0x00000000,0xFFFFC003,0xFFFFFFFF
+data4 0xFFFFFFE2,0xFFFFFFFF,0xFFFFFFC4,0xFFFFFFFF
+data4 0xFFFFFFBA,0xFFFFFFFF,0xFFFFFFBA,0xFFFFFFFF
+ASM_SIZE_DIRECTIVE(Constants_exp_64_Exponents)
+
+.align 64
+Constants_exp_64_A:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_A,@object)
+data4 0xB1B736A0,0xAAAAAAAB,0x00003FFA,0x00000000
+data4 0x90CD6327,0xAAAAAAAB,0x00003FFC,0x00000000
+data4 0xFFFFFFFF,0xFFFFFFFF,0x00003FFD,0x00000000
+// /* Reversed */
+ASM_SIZE_DIRECTIVE(Constants_exp_64_A)
+
+.align 64
+Constants_exp_64_P:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_P,@object)
+data4 0x43914A8A,0xD00D6C81,0x00003FF2,0x00000000
+data4 0x30304B30,0xB60BC4AC,0x00003FF5,0x00000000
+data4 0x7474C518,0x88888888,0x00003FF8,0x00000000
+data4 0x8DAE729D,0xAAAAAAAA,0x00003FFA,0x00000000
+data4 0xAAAAAF61,0xAAAAAAAA,0x00003FFC,0x00000000
+data4 0x000004C7,0x80000000,0x00003FFE,0x00000000
+// /* Reversed */
+ASM_SIZE_DIRECTIVE(Constants_exp_64_P)
+
+.align 64
+Constants_exp_64_Q:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_Q,@object)
+data4 0xA49EF6CA,0xD00D56F7,0x00003FEF,0x00000000
+data4 0x1C63493D,0xD00D59AB,0x00003FF2,0x00000000
+data4 0xFB50CDD2,0xB60B60B5,0x00003FF5,0x00000000
+data4 0x7BA68DC8,0x88888888,0x00003FF8,0x00000000
+data4 0xAAAAAC8D,0xAAAAAAAA,0x00003FFA,0x00000000
+data4 0xAAAAACCA,0xAAAAAAAA,0x00003FFC,0x00000000
+data4 0x00000000,0x80000000,0x00003FFE,0x00000000
+// /* Reversed */
+ASM_SIZE_DIRECTIVE(Constants_exp_64_Q)
+
+.align 64
+Constants_exp_64_T1:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_T1,@object)
+data4 0x3F800000,0x3F8164D2,0x3F82CD87,0x3F843A29
+data4 0x3F85AAC3,0x3F871F62,0x3F88980F,0x3F8A14D5
+data4 0x3F8B95C2,0x3F8D1ADF,0x3F8EA43A,0x3F9031DC
+data4 0x3F91C3D3,0x3F935A2B,0x3F94F4F0,0x3F96942D
+data4 0x3F9837F0,0x3F99E046,0x3F9B8D3A,0x3F9D3EDA
+data4 0x3F9EF532,0x3FA0B051,0x3FA27043,0x3FA43516
+data4 0x3FA5FED7,0x3FA7CD94,0x3FA9A15B,0x3FAB7A3A
+data4 0x3FAD583F,0x3FAF3B79,0x3FB123F6,0x3FB311C4
+data4 0x3FB504F3,0x3FB6FD92,0x3FB8FBAF,0x3FBAFF5B
+data4 0x3FBD08A4,0x3FBF179A,0x3FC12C4D,0x3FC346CD
+data4 0x3FC5672A,0x3FC78D75,0x3FC9B9BE,0x3FCBEC15
+data4 0x3FCE248C,0x3FD06334,0x3FD2A81E,0x3FD4F35B
+data4 0x3FD744FD,0x3FD99D16,0x3FDBFBB8,0x3FDE60F5
+data4 0x3FE0CCDF,0x3FE33F89,0x3FE5B907,0x3FE8396A
+data4 0x3FEAC0C7,0x3FED4F30,0x3FEFE4BA,0x3FF28177
+data4 0x3FF5257D,0x3FF7D0DF,0x3FFA83B3,0x3FFD3E0C
+ASM_SIZE_DIRECTIVE(Constants_exp_64_T1)
+
+.align 64
+Constants_exp_64_T2:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_T2,@object)
+data4 0x3F800000,0x3F80058C,0x3F800B18,0x3F8010A4
+data4 0x3F801630,0x3F801BBD,0x3F80214A,0x3F8026D7
+data4 0x3F802C64,0x3F8031F2,0x3F803780,0x3F803D0E
+data4 0x3F80429C,0x3F80482B,0x3F804DB9,0x3F805349
+data4 0x3F8058D8,0x3F805E67,0x3F8063F7,0x3F806987
+data4 0x3F806F17,0x3F8074A8,0x3F807A39,0x3F807FCA
+data4 0x3F80855B,0x3F808AEC,0x3F80907E,0x3F809610
+data4 0x3F809BA2,0x3F80A135,0x3F80A6C7,0x3F80AC5A
+data4 0x3F80B1ED,0x3F80B781,0x3F80BD14,0x3F80C2A8
+data4 0x3F80C83C,0x3F80CDD1,0x3F80D365,0x3F80D8FA
+data4 0x3F80DE8F,0x3F80E425,0x3F80E9BA,0x3F80EF50
+data4 0x3F80F4E6,0x3F80FA7C,0x3F810013,0x3F8105AA
+data4 0x3F810B41,0x3F8110D8,0x3F81166F,0x3F811C07
+data4 0x3F81219F,0x3F812737,0x3F812CD0,0x3F813269
+data4 0x3F813802,0x3F813D9B,0x3F814334,0x3F8148CE
+data4 0x3F814E68,0x3F815402,0x3F81599C,0x3F815F37
+ASM_SIZE_DIRECTIVE(Constants_exp_64_T2)
+
+.align 64
+Constants_exp_64_W1:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_W1,@object)
+data4 0x00000000,0x00000000,0x171EC4B4,0xBE384454
+data4 0x4AA72766,0xBE694741,0xD42518F8,0xBE5D32B6
+data4 0x3A319149,0x3E68D96D,0x62415F36,0xBE68F4DA
+data4 0xC9C86A3B,0xBE6DDA2F,0xF49228FE,0x3E6B2E50
+data4 0x1188B886,0xBE49C0C2,0x1A4C2F1F,0x3E64BFC2
+data4 0x2CB98B54,0xBE6A2FBB,0x9A55D329,0x3E5DC5DE
+data4 0x39A7AACE,0x3E696490,0x5C66DBA5,0x3E54728B
+data4 0xBA1C7D7D,0xBE62B0DB,0x09F1AF5F,0x3E576E04
+data4 0x1A0DD6A1,0x3E612500,0x795FBDEF,0xBE66A419
+data4 0xE1BD41FC,0xBE5CDE8C,0xEA54964F,0xBE621376
+data4 0x476E76EE,0x3E6370BE,0x3427EB92,0x3E390D1A
+data4 0x2BF82BF8,0x3E1336DE,0xD0F7BD9E,0xBE5FF1CB
+data4 0x0CEB09DD,0xBE60A355,0x0980F30D,0xBE5CA37E
+data4 0x4C082D25,0xBE5C541B,0x3B467D29,0xBE5BBECA
+data4 0xB9D946C5,0xBE400D8A,0x07ED374A,0xBE5E2A08
+data4 0x365C8B0A,0xBE66CB28,0xD3403BCA,0x3E3AAD5B
+data4 0xC7EA21E0,0x3E526055,0xE72880D6,0xBE442C75
+data4 0x85222A43,0x3E58B2BB,0x522C42BF,0xBE5AAB79
+data4 0x469DC2BC,0xBE605CB4,0xA48C40DC,0xBE589FA7
+data4 0x1AA42614,0xBE51C214,0xC37293F4,0xBE48D087
+data4 0xA2D673E0,0x3E367A1C,0x114F7A38,0xBE51BEBB
+data4 0x661A4B48,0xBE6348E5,0x1D3B9962,0xBDF52643
+data4 0x35A78A53,0x3E3A3B5E,0x1CECD788,0xBE46C46C
+data4 0x7857D689,0xBE60B7EC,0xD14F1AD7,0xBE594D3D
+data4 0x4C9A8F60,0xBE4F9C30,0x02DFF9D2,0xBE521873
+data4 0x55E6D68F,0xBE5E4C88,0x667F3DC4,0xBE62140F
+data4 0x3BF88747,0xBE36961B,0xC96EC6AA,0x3E602861
+data4 0xD57FD718,0xBE3B5151,0xFC4A627B,0x3E561CD0
+data4 0xCA913FEA,0xBE3A5217,0x9A5D193A,0x3E40A3CC
+data4 0x10A9C312,0xBE5AB713,0xC5F57719,0x3E4FDADB
+data4 0xDBDF59D5,0x3E361428,0x61B4180D,0x3E5DB5DB
+data4 0x7408D856,0xBE42AD5F,0x31B2B707,0x3E2A3148
+ASM_SIZE_DIRECTIVE(Constants_exp_64_W1)
+
+.align 64
+Constants_exp_64_W2:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_W2,@object)
+data4 0x00000000,0x00000000,0x37A3D7A2,0xBE641F25
+data4 0xAD028C40,0xBE68DD57,0xF212B1B6,0xBE5C77D8
+data4 0x1BA5B070,0x3E57878F,0x2ECAE6FE,0xBE55A36A
+data4 0x569DFA3B,0xBE620608,0xA6D300A3,0xBE53B50E
+data4 0x223F8F2C,0x3E5B5EF2,0xD6DE0DF4,0xBE56A0D9
+data4 0xEAE28F51,0xBE64EEF3,0x367EA80B,0xBE5E5AE2
+data4 0x5FCBC02D,0x3E47CB1A,0x9BDAFEB7,0xBE656BA0
+data4 0x805AFEE7,0x3E6E70C6,0xA3415EBA,0xBE6E0509
+data4 0x49BFF529,0xBE56856B,0x00508651,0x3E66DD33
+data4 0xC114BC13,0x3E51165F,0xC453290F,0x3E53333D
+data4 0x05539FDA,0x3E6A072B,0x7C0A7696,0xBE47CD87
+data4 0xEB05C6D9,0xBE668BF4,0x6AE86C93,0xBE67C3E3
+data4 0xD0B3E84B,0xBE533904,0x556B53CE,0x3E63E8D9
+data4 0x63A98DC8,0x3E212C89,0x032A7A22,0xBE33138F
+data4 0xBC584008,0x3E530FA9,0xCCB93C97,0xBE6ADF82
+data4 0x8370EA39,0x3E5F9113,0xFB6A05D8,0x3E5443A4
+data4 0x181FEE7A,0x3E63DACD,0xF0F67DEC,0xBE62B29D
+data4 0x3DDE6307,0x3E65C483,0xD40A24C1,0x3E5BF030
+data4 0x14E437BE,0x3E658B8F,0xED98B6C7,0xBE631C29
+data4 0x04CF7C71,0x3E6335D2,0xE954A79D,0x3E529EED
+data4 0xF64A2FB8,0x3E5D9257,0x854ED06C,0xBE6BED1B
+data4 0xD71405CB,0x3E5096F6,0xACB9FDF5,0xBE3D4893
+data4 0x01B68349,0xBDFEB158,0xC6A463B9,0x3E628D35
+data4 0xADE45917,0xBE559725,0x042FC476,0xBE68C29C
+data4 0x01E511FA,0xBE67593B,0x398801ED,0xBE4A4313
+data4 0xDA7C3300,0x3E699571,0x08062A9E,0x3E5349BE
+data4 0x755BB28E,0x3E5229C4,0x77A1F80D,0x3E67E426
+data4 0x6B69C352,0xBE52B33F,0x084DA57F,0xBE6B3550
+data4 0xD1D09A20,0xBE6DB03F,0x2161B2C1,0xBE60CBC4
+data4 0x78A2B771,0x3E56ED9C,0x9D0FA795,0xBE508E31
+data4 0xFD1A54E9,0xBE59482A,0xB07FD23E,0xBE2A17CE
+data4 0x17365712,0x3E68BF5C,0xB3785569,0x3E3956F9
+ASM_SIZE_DIRECTIVE(Constants_exp_64_W2)
+
+.section .text
+.proc expm1#
+.global expm1#
+.align 64
+
+expm1:
+#ifdef _LIBC
+.global __expm1#
+__expm1:
+#endif
+
+
+{ .mii
+ alloc r32 = ar.pfs,0,30,4,0
+(p0) add r33 = 1, r0
+(p0) cmp.eq.unc p7, p0 = r0, r0
+}
+;;
+
+
+//
+// Set p7 true for expm1
+// Set Flag = r33 = 1 for expm1
+// These are really no longer necesary, but are a remnant
+// when this file had multiple entry points.
+// They should be carefully removed
+
+
+
+{ .mfi
+(p0) add r32 = 1,r0
+(p0) fnorm.s1 f9 = f8
+ nop.i 999
+}
+
+
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p6, p8 = f8, 0x1E7
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fclass.nm.unc p9, p0 = f8, 0x1FF
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) mov f36 = f1
+ nop.i 999 ;;
+}
+
+//
+// Identify NatVals, NaNs, Infs, and Zeros.
+// Identify EM unsupporteds.
+// Save special input registers
+//
+// Create FR_X_cor = 0.0
+// GR_Flag = 0
+// GR_Expo_Range = 1
+// FR_Scale = 1.0
+//
+
+{ .mfb
+ nop.m 999
+(p0) mov f32 = f0
+(p6) br.cond.spnt EXP_64_SPECIAL ;;
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+(p9) br.cond.spnt EXP_64_UNSUPPORTED ;;
+}
+
+//
+// Branch out for special input values
+//
+
+{ .mfi
+(p0) cmp.ne.unc p12, p13 = 0x01, r33
+(p0) fcmp.lt.unc.s0 p9,p0 = f8, f0
+(p0) cmp.eq.unc p15, p0 = r0, r0
+}
+
+//
+// Raise possible denormal operand exception
+// Normalize x
+//
+// This function computes exp( x + x_cor)
+// Input FR 1: FR_X
+// Input FR 2: FR_X_cor
+// Input GR 1: GR_Flag
+// Input GR 2: GR_Expo_Range
+// Output FR 3: FR_Y_hi
+// Output FR 4: FR_Y_lo
+// Output FR 5: FR_Scale
+// Output PR 1: PR_Safe
+
+//
+// Prepare to load constants
+// Set Safe = True
+//
+
+{ .mmi
+(p0) addl r34 = @ltoff(Constants_exp_64_Arg#), gp
+(p0) addl r40 = @ltoff(Constants_exp_64_W1#), gp
+(p0) addl r41 = @ltoff(Constants_exp_64_W2#), gp
+}
+;;
+
+{ .mmi
+ ld8 r34 = [r34]
+ ld8 r40 = [r40]
+(p0) addl r50 = @ltoff(Constants_exp_64_T1#), gp
+}
+;;
+
+
+{ .mmi
+ ld8 r41 = [r41]
+(p0) ldfe f37 = [r34],16
+(p0) addl r51 = @ltoff(Constants_exp_64_T2#), gp
+}
+;;
+
+//
+// N = fcvt.fx(float_N)
+// Set p14 if -6 > expo_X
+//
+
+
+//
+// Bias = 0x0FFFF
+// expo_X = expo_X and Mask
+//
+
+//
+// Load L_lo
+// Set p10 if 14 < expo_X
+//
+
+{ .mmi
+ ld8 r50 = [r50]
+(p0) ldfe f40 = [r34],16
+ nop.i 999
+}
+;;
+
+{ .mlx
+ nop.m 999
+(p0) movl r58 = 0x0FFFF
+}
+;;
+
+//
+// Load W2_ptr
+// Branch to SMALL is expo_X < -6
+//
+
+//
+// float_N = X * L_Inv
+// expo_X = exponent of X
+// Mask = 0x1FFFF
+//
+
+{ .mmi
+ ld8 r51 = [r51]
+(p0) ldfe f41 = [r34],16
+}
+;;
+
+{ .mlx
+(p0) addl r34 = @ltoff(Constants_exp_64_Exponents#), gp
+(p0) movl r39 = 0x1FFFF
+}
+;;
+
+{ .mmi
+ ld8 r34 = [r34]
+(p0) getf.exp r37 = f9
+ nop.i 999
+}
+;;
+
+{ .mii
+ nop.m 999
+ nop.i 999
+(p0) and r37 = r37, r39 ;;
+}
+
+{ .mmi
+(p0) sub r37 = r37, r58 ;;
+(p0) cmp.gt.unc p14, p0 = -6, r37
+(p0) cmp.lt.unc p10, p0 = 14, r37 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Load L_inv
+// Set p12 true for Flag = 0 (exp)
+// Set p13 true for Flag = 1 (expm1)
+//
+(p0) fmpy.s1 f38 = f9, f37
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+//
+// Load L_hi
+// expo_X = expo_X - Bias
+// get W1_ptr
+//
+(p0) fcvt.fx.s1 f39 = f38
+(p14) br.cond.spnt EXP_SMALL ;;
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+(p10) br.cond.spnt EXP_HUGE ;;
+}
+
+{ .mmi
+(p0) shladd r34 = r32,4,r34
+(p0) addl r35 = @ltoff(Constants_exp_64_A#), gp
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ld8 r35 = [r35]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+//
+// Load T_1,T_2
+//
+
+{ .mmb
+(p0) ldfe f51 = [r35],16
+(p0) ld8 r45 = [r34],8
+ nop.b 999 ;;
+}
+//
+// Set Safe = True if k >= big_expo_neg
+// Set Safe = False if k < big_expo_neg
+//
+
+{ .mmb
+(p0) ldfe f49 = [r35],16
+(p0) ld8 r48 = [r34],0
+ nop.b 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Branch to HUGE is expo_X > 14
+//
+(p0) fcvt.xf f38 = f39
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p0) getf.sig r52 = f39
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mii
+ nop.m 999
+(p0) extr.u r43 = r52, 6, 6 ;;
+//
+// r = r - float_N * L_lo
+// K = extr(N_fix,12,52)
+//
+(p0) shladd r40 = r43,3,r40 ;;
+}
+
+{ .mfi
+(p0) shladd r50 = r43,2,r50
+(p0) fnma.s1 f42 = f40, f38, f9
+//
+// float_N = float(N)
+// N_fix = signficand N
+//
+(p0) extr.u r42 = r52, 0, 6
+}
+
+{ .mmi
+(p0) ldfd f43 = [r40],0 ;;
+(p0) shladd r41 = r42,3,r41
+(p0) shladd r51 = r42,2,r51
+}
+//
+// W_1_p1 = 1 + W_1
+//
+
+{ .mmi
+(p0) ldfs f44 = [r50],0 ;;
+(p0) ldfd f45 = [r41],0
+//
+// M_2 = extr(N_fix,0,6)
+// M_1 = extr(N_fix,6,6)
+// r = X - float_N * L_hi
+//
+(p0) extr r44 = r52, 12, 52
+}
+
+{ .mmi
+(p0) ldfs f46 = [r51],0 ;;
+(p0) sub r46 = r58, r44
+(p0) cmp.gt.unc p8, p15 = r44, r45
+}
+//
+// W = W_1 + W_1_p1*W_2
+// Load A_2
+// Bias_m_K = Bias - K
+//
+
+{ .mii
+(p0) ldfe f40 = [r35],16
+//
+// load A_1
+// poly = A_2 + r*A_3
+// rsq = r * r
+// neg_2_mK = exponent of Bias_m_k
+//
+(p0) add r47 = r58, r44 ;;
+//
+// Set Safe = True if k <= big_expo_pos
+// Set Safe = False if k > big_expo_pos
+// Load A_3
+//
+(p15) cmp.lt p8,p15 = r44,r48 ;;
+}
+
+{ .mmf
+(p0) setf.exp f61 = r46
+//
+// Bias_p + K = Bias + K
+// T = T_1 * T_2
+//
+(p0) setf.exp f36 = r47
+(p0) fnma.s1 f42 = f41, f38, f42 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Load W_1,W_2
+// Load big_exp_pos, load big_exp_neg
+//
+(p0) fadd.s1 f47 = f43, f1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 f52 = f42, f51, f49
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fmpy.s1 f48 = f42, f42
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fmpy.s1 f53 = f44, f46
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 f54 = f45, f47, f43
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fneg f61 = f61
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 f52 = f42, f52, f40
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fadd.s1 f55 = f54, f1
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+//
+// W + Wp1 * poly
+//
+(p0) mov f34 = f53
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// A_1 + r * poly
+// Scale = setf_exp(Bias_p_k)
+//
+(p0) fma.s1 f52 = f48, f52, f42
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// poly = r + rsq(A_1 + r*poly)
+// Wp1 = 1 + W
+// neg_2_mK = -neg_2_mK
+//
+(p0) fma.s1 f35 = f55, f52, f54
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p0) fmpy.s1 f35 = f35, f53
+//
+// Y_hi = T
+// Y_lo = T * (W + Wp1*poly)
+//
+(p12) br.cond.sptk EXP_MAIN ;;
+}
+//
+// Branch if exp(x)
+// Continue for exp(x-1)
+//
+
+{ .mii
+(p0) cmp.lt.unc p12, p13 = 10, r44
+ nop.i 999 ;;
+//
+// Set p12 if 10 < K, Else p13
+//
+(p13) cmp.gt.unc p13, p14 = -10, r44 ;;
+}
+//
+// K > 10: Y_lo = Y_lo + neg_2_mK
+// K <=10: Set p13 if -10 > K, Else set p14
+//
+
+{ .mfi
+(p13) cmp.eq p15, p0 = r0, r0
+(p14) fadd.s1 f34 = f61, f34
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p12) fadd.s1 f35 = f35, f61
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p13) fadd.s1 f35 = f35, f34
+ nop.i 999
+}
+
+{ .mfb
+ nop.m 999
+//
+// K <= 10 and K < -10, Set Safe = True
+// K <= 10 and K < 10, Y_lo = Y_hi + Y_lo
+// K <= 10 and K > =-10, Y_hi = Y_hi + neg_2_mk
+//
+(p13) mov f34 = f61
+(p0) br.cond.sptk EXP_MAIN ;;
+}
+EXP_SMALL:
+
+{ .mmi
+(p12) addl r35 = @ltoff(Constants_exp_64_P#), gp
+(p0) addl r34 = @ltoff(Constants_exp_64_Exponents#), gp
+ nop.i 999
+}
+;;
+
+{ .mmi
+(p12) ld8 r35 = [r35]
+ ld8 r34 = [r34]
+ nop.i 999
+}
+;;
+
+
+{ .mmi
+(p13) addl r35 = @ltoff(Constants_exp_64_Q#), gp
+ nop.m 999
+ nop.i 999
+}
+;;
+
+
+//
+// Return
+// K <= 10 and K < 10, Y_hi = neg_2_mk
+//
+// /*******************************************************/
+// /*********** Branch EXP_SMALL *************************/
+// /*******************************************************/
+
+{ .mfi
+(p13) ld8 r35 = [r35]
+(p0) mov f42 = f9
+(p0) add r34 = 0x48,r34
+}
+;;
+
+//
+// Flag = 0
+// r4 = rsq * rsq
+//
+
+{ .mfi
+(p0) ld8 r49 =[r34],0
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mii
+ nop.m 999
+ nop.i 999 ;;
+//
+// Flag = 1
+//
+(p0) cmp.lt.unc p14, p0 = r37, r49 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// r = X
+//
+(p0) fmpy.s1 f48 = f42, f42
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+//
+// rsq = r * r
+//
+(p0) fmpy.s1 f50 = f48, f48
+//
+// Is input very small?
+//
+(p14) br.cond.spnt EXP_VERY_SMALL ;;
+}
+//
+// Flag_not1: Y_hi = 1.0
+// Flag is 1: r6 = rsq * r4
+//
+
+{ .mfi
+(p12) ldfe f52 = [r35],16
+(p12) mov f34 = f1
+(p0) add r53 = 0x1,r0 ;;
+}
+
+{ .mfi
+(p13) ldfe f51 = [r35],16
+//
+// Flag_not_1: Y_lo = poly_hi + r4 * poly_lo
+//
+(p13) mov f34 = f9
+ nop.i 999 ;;
+}
+
+{ .mmf
+(p12) ldfe f53 = [r35],16
+//
+// For Flag_not_1, Y_hi = X
+// Scale = 1
+// Create 0x000...01
+//
+(p0) setf.sig f37 = r53
+(p0) mov f36 = f1 ;;
+}
+
+{ .mmi
+(p13) ldfe f52 = [r35],16 ;;
+(p12) ldfe f54 = [r35],16
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p13) ldfe f53 = [r35],16
+(p13) fmpy.s1 f58 = f48, f50
+ nop.i 999 ;;
+}
+//
+// Flag_not1: poly_lo = P_5 + r*P_6
+// Flag_1: poly_lo = Q_6 + r*Q_7
+//
+
+{ .mmi
+(p13) ldfe f54 = [r35],16 ;;
+(p12) ldfe f55 = [r35],16
+ nop.i 999 ;;
+}
+
+{ .mmi
+(p12) ldfe f56 = [r35],16 ;;
+(p13) ldfe f55 = [r35],16
+ nop.i 999 ;;
+}
+
+{ .mmi
+(p12) ldfe f57 = [r35],0 ;;
+(p13) ldfe f56 = [r35],16
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p13) ldfe f57 = [r35],0
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// For Flag_not_1, load p5,p6,p1,p2
+// Else load p5,p6,p1,p2
+//
+(p12) fma.s1 f60 = f52, f42, f53
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p13) fma.s1 f60 = f51, f42, f52
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p12) fma.s1 f60 = f60, f42, f54
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p12) fma.s1 f59 = f56, f42, f57
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p13) fma.s1 f60 = f42, f60, f53
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p12) fma.s1 f59 = f59, f48, f42
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Flag_1: poly_lo = Q_5 + r*(Q_6 + r*Q_7)
+// Flag_not1: poly_lo = P_4 + r*(P_5 + r*P_6)
+// Flag_not1: poly_hi = (P_1 + r*P_2)
+//
+(p13) fmpy.s1 f60 = f60, f58
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p12) fma.s1 f60 = f60, f42, f55
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Flag_1: poly_lo = r6 *(Q_5 + ....)
+// Flag_not1: poly_hi = r + rsq *(P_1 + r*P_2)
+//
+(p12) fma.s1 f35 = f60, f50, f59
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p13) fma.s1 f59 = f54, f42, f55
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Flag_not1: Y_lo = rsq* poly_hi + poly_lo
+// Flag_1: poly_lo = rsq* poly_hi + poly_lo
+//
+(p13) fma.s1 f59 = f59, f42, f56
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Flag_not_1: (P_1 + r*P_2)
+//
+(p13) fma.s1 f59 = f59, f42, f57
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Flag_not_1: poly_hi = r + rsq * (P_1 + r*P_2)
+//
+(p13) fma.s1 f35 = f59, f48, f60
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Create 0.000...01
+//
+(p0) for f37 = f35, f37
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+//
+// Set lsb of Y_lo to 1
+//
+(p0) fmerge.se f35 = f35,f37
+(p0) br.cond.sptk EXP_MAIN ;;
+}
+EXP_VERY_SMALL:
+
+{ .mmi
+ nop.m 999
+(p13) addl r34 = @ltoff(Constants_exp_64_Exponents#),gp
+ nop.i 999;;
+}
+
+{ .mfi
+(p13) ld8 r34 = [r34];
+(p12) mov f35 = f9
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p12) mov f34 = f1
+(p12) br.cond.sptk EXP_MAIN ;;
+}
+
+{ .mlx
+(p13) add r34 = 8,r34
+(p13) movl r39 = 0x0FFFE ;;
+}
+//
+// Load big_exp_neg
+// Create 1/2's exponent
+//
+
+{ .mii
+(p13) setf.exp f56 = r39
+(p13) shladd r34 = r32,4,r34 ;;
+ nop.i 999
+}
+//
+// Negative exponents are stored after positive
+//
+
+{ .mfi
+(p13) ld8 r45 = [r34],0
+//
+// Y_hi = x
+// Scale = 1
+//
+(p13) fmpy.s1 f35 = f9, f9
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Reset Safe if necessary
+// Create 1/2
+//
+(p13) mov f34 = f9
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p13) cmp.lt.unc p0, p15 = r37, r45
+(p13) mov f36 = f1
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+//
+// Y_lo = x * x
+//
+(p13) fmpy.s1 f35 = f35, f56
+//
+// Y_lo = x*x/2
+//
+(p13) br.cond.sptk EXP_MAIN ;;
+}
+EXP_HUGE:
+
+{ .mfi
+ nop.m 999
+(p0) fcmp.gt.unc.s1 p14, p0 = f9, f0
+ nop.i 999
+}
+
+{ .mlx
+ nop.m 999
+(p0) movl r39 = 0x15DC0 ;;
+}
+
+{ .mfi
+(p14) setf.exp f34 = r39
+(p14) mov f35 = f1
+(p14) cmp.eq p0, p15 = r0, r0 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p14) mov f36 = f34
+//
+// If x > 0, Set Safe = False
+// If x > 0, Y_hi = 2**(24,000)
+// If x > 0, Y_lo = 1.0
+// If x > 0, Scale = 2**(24,000)
+//
+(p14) br.cond.sptk EXP_MAIN ;;
+}
+
+{ .mlx
+ nop.m 999
+(p12) movl r39 = 0xA240
+}
+
+{ .mlx
+ nop.m 999
+(p12) movl r38 = 0xA1DC ;;
+}
+
+{ .mmb
+(p13) cmp.eq p15, p14 = r0, r0
+(p12) setf.exp f34 = r39
+ nop.b 999 ;;
+}
+
+{ .mlx
+(p12) setf.exp f35 = r38
+(p13) movl r39 = 0xFF9C
+}
+
+{ .mfi
+ nop.m 999
+(p13) fsub.s1 f34 = f0, f1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p12) mov f36 = f34
+(p12) cmp.eq p0, p15 = r0, r0 ;;
+}
+
+{ .mfi
+(p13) setf.exp f35 = r39
+(p13) mov f36 = f1
+ nop.i 999 ;;
+}
+EXP_MAIN:
+
+{ .mfi
+(p0) cmp.ne.unc p12, p0 = 0x01, r33
+(p0) fmpy.s1 f101 = f36, f35
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p0) fma.d.s0 f99 = f34, f36, f101
+(p15) br.cond.sptk EXP_64_RETURN;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fsetc.s3 0x7F,0x01
+ nop.i 999
+}
+
+{ .mlx
+ nop.m 999
+(p0) movl r50 = 0x000000000103FF ;;
+}
+//
+// S0 user supplied status
+// S2 user supplied status + WRE + TD (Overflows)
+// S3 user supplied status + RZ + TD (Underflows)
+//
+//
+// If (Safe) is true, then
+// Compute result using user supplied status field.
+// No overflow or underflow here, but perhaps inexact.
+// Return
+// Else
+// Determine if overflow or underflow was raised.
+// Fetch +/- overflow threshold for IEEE single, double,
+// double extended
+//
+
+{ .mfi
+(p0) setf.exp f60 = r50
+(p0) fma.d.s3 f102 = f34, f36, f101
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fsetc.s3 0x7F,0x40
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// For Safe, no need to check for over/under.
+// For expm1, handle errors like exp.
+//
+(p0) fsetc.s2 0x7F,0x42
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.d.s2 f100 = f34, f36, f101
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fsetc.s2 0x7F,0x40
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p7) fclass.m.unc p12, p0 = f102, 0x00F
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p11, p0 = f102, 0x00F
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p7) fcmp.ge.unc.s1 p10, p0 = f100, f60
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+//
+// Create largest double exponent + 1.
+// Create smallest double exponent - 1.
+//
+(p0) fcmp.ge.unc.s1 p8, p0 = f100, f60
+ nop.i 999 ;;
+}
+//
+// fcmp: resultS2 >= + overflow threshold -> set (a) if true
+// fcmp: resultS2 <= - overflow threshold -> set (b) if true
+// fclass: resultS3 is denorm/unorm/0 -> set (d) if true
+//
+
+{ .mib
+(p10) mov r65 = 41
+ nop.i 999
+(p10) br.cond.sptk __libm_error_region ;;
+}
+
+{ .mib
+(p8) mov r65 = 14
+ nop.i 999
+(p8) br.cond.sptk __libm_error_region ;;
+}
+//
+// Report that exp overflowed
+//
+
+{ .mib
+(p12) mov r65 = 42
+ nop.i 999
+(p12) br.cond.sptk __libm_error_region ;;
+}
+
+{ .mib
+(p11) mov r65 = 15
+ nop.i 999
+(p11) br.cond.sptk __libm_error_region ;;
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+//
+// Report that exp underflowed
+//
+(p0) br.cond.sptk EXP_64_RETURN;;
+}
+EXP_64_SPECIAL:
+
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p6, p0 = f8, 0x0c3
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p13, p8 = f8, 0x007
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p7) fclass.m.unc p14, p0 = f8, 0x007
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p12, p9 = f8, 0x021
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p11, p0 = f8, 0x022
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p7) fclass.m.unc p10, p0 = f8, 0x022
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Identify +/- 0, Inf, or -Inf
+// Generate the right kind of NaN.
+//
+(p13) fadd.d.s0 f99 = f0, f1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p14) mov f99 = f8
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p6) fadd.d.s0 f99 = f8, f1
+//
+// exp(+/-0) = 1
+// expm1(+/-0) = +/-0
+// No exceptions raised
+//
+(p6) br.cond.sptk EXP_64_RETURN;;
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+(p14) br.cond.sptk EXP_64_RETURN;;
+}
+
+{ .mfi
+ nop.m 999
+(p11) mov f99 = f0
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p10) fsub.d.s1 f99 = f0, f1
+//
+// exp(-Inf) = 0
+// expm1(-Inf) = -1
+// No exceptions raised.
+//
+(p10) br.cond.sptk EXP_64_RETURN;;
+}
+
+{ .mfb
+ nop.m 999
+(p12) fmpy.d.s1 f99 = f8, f1
+//
+// exp(+Inf) = Inf
+// No exceptions raised.
+//
+(p0) br.cond.sptk EXP_64_RETURN;;
+}
+
+
+EXP_64_UNSUPPORTED:
+
+{ .mfb
+ nop.m 999
+(p0) fmpy.d.s0 f99 = f8, f0
+ nop.b 0;;
+}
+
+EXP_64_RETURN:
+{ .mfb
+ nop.m 999
+(p0) mov f8 = f99
+(p0) br.ret.sptk b0
+}
+.endp expm1
+ASM_SIZE_DIRECTIVE(expm1)
+
+.proc __libm_error_region
+__libm_error_region:
+.prologue
+// (1)
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+
+// (2)
+{ .mmi
+ stfd [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+
+.body
+// (3)
+{ .mib
+ stfd [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
+}
+{ .mib
+ stfd [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+
+// (4)
+{ .mmi
+ ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/s_expm1f.S b/sysdeps/ia64/fpu/s_expm1f.S
new file mode 100644
index 0000000..b317bae
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_expm1f.S
@@ -0,0 +1,1742 @@
+.file "exp_m1f.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// HISTORY
+// 2/02/00 Initial Version
+// 4/04/00 Unwind support added
+// 8/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+//
+// *********************************************************************
+//
+// Function: Combined expf(x) and expm1f(x), where
+// x
+// expf(x) = e , for single precision x values
+// x
+// expm1f(x) = e - 1 for single precision x values
+//
+// *********************************************************************
+//
+// Accuracy: Within .7 ulps for 80-bit floating point values
+// Very accurate for single precision values
+//
+// *********************************************************************
+//
+// Resources Used:
+//
+// Floating-Point Registers: f8 (Input and Return Value)
+// f9,f32-f61, f99-f102
+//
+// General Purpose Registers:
+// r32-r61
+// r62-r65 (Used to pass arguments to error handling routine)
+//
+// Predicate Registers: p6-p15
+//
+// *********************************************************************
+//
+// IEEE Special Conditions:
+//
+// Denormal fault raised on denormal inputs
+// Overflow exceptions raised when appropriate for exp and expm1
+// Underflow exceptions raised when appropriate for exp and expm1
+// (Error Handling Routine called for overflow and Underflow)
+// Inexact raised when appropriate by algorithm
+//
+// expf(inf) = inf
+// expf(-inf) = +0
+// expf(SNaN) = QNaN
+// expf(QNaN) = QNaN
+// expf(0) = 1
+// expf(EM_special Values) = QNaN
+// expf(inf) = inf
+// expm1f(-inf) = -1
+// expm1f(SNaN) = QNaN
+// expm1f(QNaN) = QNaN
+// expm1f(0) = 0
+// expm1f(EM_special Values) = QNaN
+//
+// *********************************************************************
+//
+// Implementation and Algorithm Notes:
+//
+// ker_exp_64( in_FR : X,
+// in_GR : Flag,
+// in_GR : Expo_Range
+// out_FR : Y_hi,
+// out_FR : Y_lo,
+// out_FR : scale,
+// out_PR : Safe )
+//
+// On input, X is in register format and
+// Flag = 0 for exp,
+// Flag = 1 for expm1,
+//
+// On output, provided X and X_cor are real numbers, then
+//
+// scale*(Y_hi + Y_lo) approximates expf(X) if Flag is 0
+// scale*(Y_hi + Y_lo) approximates expf(X)-1 if Flag is 1
+//
+// The accuracy is sufficient for a highly accurate 64 sig.
+// bit implementation. Safe is set if there is no danger of
+// overflow/underflow when the result is composed from scale,
+// Y_hi and Y_lo. Thus, we can have a fast return if Safe is set.
+// Otherwise, one must prepare to handle the possible exception
+// appropriately. Note that SAFE not set (false) does not mean
+// that overflow/underflow will occur; only the setting of SAFE
+// guarantees the opposite.
+//
+// **** High Level Overview ****
+//
+// The method consists of three cases.
+//
+// If |X| < Tiny use case exp_tiny;
+// else if |X| < 2^(-6) use case exp_small;
+// else use case exp_regular;
+//
+// Case exp_tiny:
+//
+// 1 + X can be used to approximate expf(X) or expf(X+X_cor);
+// X + X^2/2 can be used to approximate expf(X) - 1
+//
+// Case exp_small:
+//
+// Here, expf(X), expf(X+X_cor), and expf(X) - 1 can all be
+// appproximated by a relatively simple polynomial.
+//
+// This polynomial resembles the truncated Taylor series
+//
+// expf(w) = 1 + w + w^2/2! + w^3/3! + ... + w^n/n!
+//
+// Case exp_regular:
+//
+// Here we use a table lookup method. The basic idea is that in
+// order to compute expf(X), we accurately decompose X into
+//
+// X = N * log(2)/(2^12) + r, |r| <= log(2)/2^13.
+//
+// Hence
+//
+// expf(X) = 2^( N / 2^12 ) * expf(r).
+//
+// The value 2^( N / 2^12 ) is obtained by simple combinations
+// of values calculated beforehand and stored in table; expf(r)
+// is approximated by a short polynomial because |r| is small.
+//
+// We elaborate this method in 4 steps.
+//
+// Step 1: Reduction
+//
+// The value 2^12/log(2) is stored as a double-extended number
+// L_Inv.
+//
+// N := round_to_nearest_integer( X * L_Inv )
+//
+// The value log(2)/2^12 is stored as two numbers L_hi and L_lo so
+// that r can be computed accurately via
+//
+// r := (X - N*L_hi) - N*L_lo
+//
+// We pick L_hi such that N*L_hi is representable in 64 sig. bits
+// and thus the FMA X - N*L_hi is error free. So r is the
+// 1 rounding error from an exact reduction with respect to
+//
+// L_hi + L_lo.
+//
+// In particular, L_hi has 30 significant bit and can be stored
+// as a double-precision number; L_lo has 64 significant bits and
+// stored as a double-extended number.
+//
+// In the case Flag = 2, we further modify r by
+//
+// r := r + X_cor.
+//
+// Step 2: Approximation
+//
+// expf(r) - 1 is approximated by a short polynomial of the form
+//
+// r + A_1 r^2 + A_2 r^3 + A_3 r^4 .
+//
+// Step 3: Composition from Table Values
+//
+// The value 2^( N / 2^12 ) can be composed from a couple of tables
+// of precalculated values. First, express N as three integers
+// K, M_1, and M_2 as
+//
+// N = K * 2^12 + M_1 * 2^6 + M_2
+//
+// Where 0 <= M_1, M_2 < 2^6; and K can be positive or negative.
+// When N is represented in 2's complement, M_2 is simply the 6
+// lsb's, M_1 is the next 6, and K is simply N shifted right
+// arithmetically (sign extended) by 12 bits.
+//
+// Now, 2^( N / 2^12 ) is simply
+//
+// 2^K * 2^( M_1 / 2^6 ) * 2^( M_2 / 2^12 )
+//
+// Clearly, 2^K needs no tabulation. The other two values are less
+// trivial because if we store each accurately to more than working
+// precision, than its product is too expensive to calculate. We
+// use the following method.
+//
+// Define two mathematical values, delta_1 and delta_2, implicitly
+// such that
+//
+// T_1 = expf( [M_1 log(2)/2^6] - delta_1 )
+// T_2 = expf( [M_2 log(2)/2^12] - delta_2 )
+//
+// are representable as 24 significant bits. To illustrate the idea,
+// we show how we define delta_1:
+//
+// T_1 := round_to_24_bits( expf( M_1 log(2)/2^6 ) )
+// delta_1 = (M_1 log(2)/2^6) - log( T_1 )
+//
+// The last equality means mathematical equality. We then tabulate
+//
+// W_1 := expf(delta_1) - 1
+// W_2 := expf(delta_2) - 1
+//
+// Both in double precision.
+//
+// From the tabulated values T_1, T_2, W_1, W_2, we compose the values
+// T and W via
+//
+// T := T_1 * T_2 ...exactly
+// W := W_1 + (1 + W_1)*W_2
+//
+// W approximates expf( delta ) - 1 where delta = delta_1 + delta_2.
+// The mathematical product of T and (W+1) is an accurate representation
+// of 2^(M_1/2^6) * 2^(M_2/2^12).
+//
+// Step 4. Reconstruction
+//
+// Finally, we can reconstruct expf(X), expf(X) - 1.
+// Because
+//
+// X = K * log(2) + (M_1*log(2)/2^6 - delta_1)
+// + (M_2*log(2)/2^12 - delta_2)
+// + delta_1 + delta_2 + r ...accurately
+// We have
+//
+// expf(X) ~=~ 2^K * ( T + T*[expf(delta_1+delta_2+r) - 1] )
+// ~=~ 2^K * ( T + T*[expf(delta + r) - 1] )
+// ~=~ 2^K * ( T + T*[(expf(delta)-1)
+// + expf(delta)*(expf(r)-1)] )
+// ~=~ 2^K * ( T + T*( W + (1+W)*poly(r) ) )
+// ~=~ 2^K * ( Y_hi + Y_lo )
+//
+// where Y_hi = T and Y_lo = T*(W + (1+W)*poly(r))
+//
+// For expf(X)-1, we have
+//
+// expf(X)-1 ~=~ 2^K * ( Y_hi + Y_lo ) - 1
+// ~=~ 2^K * ( Y_hi + Y_lo - 2^(-K) )
+//
+// and we combine Y_hi + Y_lo - 2^(-N) into the form of two
+// numbers Y_hi + Y_lo carefully.
+//
+// **** Algorithm Details ****
+//
+// A careful algorithm must be used to realize the mathematical ideas
+// accurately. We describe each of the three cases. We assume SAFE
+// is preset to be TRUE.
+//
+// Case exp_tiny:
+//
+// The important points are to ensure an accurate result under
+// different rounding directions and a correct setting of the SAFE
+// flag.
+//
+// If Flag is 1, then
+// SAFE := False ...possibility of underflow
+// Scale := 1.0
+// Y_hi := X
+// Y_lo := 2^(-17000)
+// Else
+// Scale := 1.0
+// Y_hi := 1.0
+// Y_lo := X ...for different rounding modes
+// Endif
+//
+// Case exp_small:
+//
+// Here we compute a simple polynomial. To exploit parallelism, we split
+// the polynomial into several portions.
+//
+// Let r = X
+//
+// If Flag is not 1 ...i.e. expf( argument )
+//
+// rsq := r * r;
+// r4 := rsq*rsq
+// poly_lo := P_3 + r*(P_4 + r*(P_5 + r*P_6))
+// poly_hi := r + rsq*(P_1 + r*P_2)
+// Y_lo := poly_hi + r4 * poly_lo
+// set lsb(Y_lo) to 1
+// Y_hi := 1.0
+// Scale := 1.0
+//
+// Else ...i.e. expf( argument ) - 1
+//
+// rsq := r * r
+// r4 := rsq * rsq
+// r6 := rsq * r4
+// poly_lo := r6*(Q_5 + r*(Q_6 + r*Q_7))
+// poly_hi := Q_1 + r*(Q_2 + r*(Q_3 + r*Q_4))
+// Y_lo := rsq*poly_hi + poly_lo
+// set lsb(Y_lo) to 1
+// Y_hi := X
+// Scale := 1.0
+//
+// Endif
+//
+// Case exp_regular:
+//
+// The previous description contain enough information except the
+// computation of poly and the final Y_hi and Y_lo in the case for
+// expf(X)-1.
+//
+// The computation of poly for Step 2:
+//
+// rsq := r*r
+// poly := r + rsq*(A_1 + r*(A_2 + r*A_3))
+//
+// For the case expf(X) - 1, we need to incorporate 2^(-K) into
+// Y_hi and Y_lo at the end of Step 4.
+//
+// If K > 10 then
+// Y_lo := Y_lo - 2^(-K)
+// Else
+// If K < -10 then
+// Y_lo := Y_hi + Y_lo
+// Y_hi := -2^(-K)
+// Else
+// Y_hi := Y_hi - 2^(-K)
+// End If
+// End If
+//
+
+#include "libm_support.h"
+
+
+GR_SAVE_B0 = r60
+GR_SAVE_PFS = r59
+GR_SAVE_GP = r61
+
+GR_Parameter_X = r62
+GR_Parameter_Y = r63
+GR_Parameter_RESULT = r64
+GR_Parameter_TAG = r65
+
+FR_X = f9
+FR_Y = f1
+FR_RESULT = f99
+
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
+.align 64
+Constants_exp_64_Arg:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_Arg,@object)
+data4 0x5C17F0BC,0xB8AA3B29,0x0000400B,0x00000000
+data4 0x00000000,0xB17217F4,0x00003FF2,0x00000000
+data4 0xF278ECE6,0xF473DE6A,0x00003FD4,0x00000000
+// /* Inv_L, L_hi, L_lo */
+ASM_SIZE_DIRECTIVE(Constants_exp_64_Arg)
+
+.align 64
+Constants_exp_64_Exponents:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_Exponents,@object)
+data4 0x0000007E,0x00000000,0xFFFFFF83,0xFFFFFFFF
+data4 0x000003FE,0x00000000,0xFFFFFC03,0xFFFFFFFF
+data4 0x00003FFE,0x00000000,0xFFFFC003,0xFFFFFFFF
+data4 0x00003FFE,0x00000000,0xFFFFC003,0xFFFFFFFF
+data4 0xFFFFFFE2,0xFFFFFFFF,0xFFFFFFC4,0xFFFFFFFF
+data4 0xFFFFFFBA,0xFFFFFFFF,0xFFFFFFBA,0xFFFFFFFF
+ASM_SIZE_DIRECTIVE(Constants_exp_64_Exponents)
+
+.align 64
+Constants_exp_64_A:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_A,@object)
+data4 0xB1B736A0,0xAAAAAAAB,0x00003FFA,0x00000000
+data4 0x90CD6327,0xAAAAAAAB,0x00003FFC,0x00000000
+data4 0xFFFFFFFF,0xFFFFFFFF,0x00003FFD,0x00000000
+// /* Reversed */
+ASM_SIZE_DIRECTIVE(Constants_exp_64_A)
+
+.align 64
+Constants_exp_64_P:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_P,@object)
+data4 0x43914A8A,0xD00D6C81,0x00003FF2,0x00000000
+data4 0x30304B30,0xB60BC4AC,0x00003FF5,0x00000000
+data4 0x7474C518,0x88888888,0x00003FF8,0x00000000
+data4 0x8DAE729D,0xAAAAAAAA,0x00003FFA,0x00000000
+data4 0xAAAAAF61,0xAAAAAAAA,0x00003FFC,0x00000000
+data4 0x000004C7,0x80000000,0x00003FFE,0x00000000
+// /* Reversed */
+ASM_SIZE_DIRECTIVE(Constants_exp_64_P)
+
+.align 64
+Constants_exp_64_Q:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_Q,@object)
+data4 0xA49EF6CA,0xD00D56F7,0x00003FEF,0x00000000
+data4 0x1C63493D,0xD00D59AB,0x00003FF2,0x00000000
+data4 0xFB50CDD2,0xB60B60B5,0x00003FF5,0x00000000
+data4 0x7BA68DC8,0x88888888,0x00003FF8,0x00000000
+data4 0xAAAAAC8D,0xAAAAAAAA,0x00003FFA,0x00000000
+data4 0xAAAAACCA,0xAAAAAAAA,0x00003FFC,0x00000000
+data4 0x00000000,0x80000000,0x00003FFE,0x00000000
+// /* Reversed */
+ASM_SIZE_DIRECTIVE(Constants_exp_64_Q)
+
+.align 64
+Constants_exp_64_T1:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_T1,@object)
+data4 0x3F800000,0x3F8164D2,0x3F82CD87,0x3F843A29
+data4 0x3F85AAC3,0x3F871F62,0x3F88980F,0x3F8A14D5
+data4 0x3F8B95C2,0x3F8D1ADF,0x3F8EA43A,0x3F9031DC
+data4 0x3F91C3D3,0x3F935A2B,0x3F94F4F0,0x3F96942D
+data4 0x3F9837F0,0x3F99E046,0x3F9B8D3A,0x3F9D3EDA
+data4 0x3F9EF532,0x3FA0B051,0x3FA27043,0x3FA43516
+data4 0x3FA5FED7,0x3FA7CD94,0x3FA9A15B,0x3FAB7A3A
+data4 0x3FAD583F,0x3FAF3B79,0x3FB123F6,0x3FB311C4
+data4 0x3FB504F3,0x3FB6FD92,0x3FB8FBAF,0x3FBAFF5B
+data4 0x3FBD08A4,0x3FBF179A,0x3FC12C4D,0x3FC346CD
+data4 0x3FC5672A,0x3FC78D75,0x3FC9B9BE,0x3FCBEC15
+data4 0x3FCE248C,0x3FD06334,0x3FD2A81E,0x3FD4F35B
+data4 0x3FD744FD,0x3FD99D16,0x3FDBFBB8,0x3FDE60F5
+data4 0x3FE0CCDF,0x3FE33F89,0x3FE5B907,0x3FE8396A
+data4 0x3FEAC0C7,0x3FED4F30,0x3FEFE4BA,0x3FF28177
+data4 0x3FF5257D,0x3FF7D0DF,0x3FFA83B3,0x3FFD3E0C
+ASM_SIZE_DIRECTIVE(Constants_exp_64_T1)
+
+.align 64
+Constants_exp_64_T2:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_T2,@object)
+data4 0x3F800000,0x3F80058C,0x3F800B18,0x3F8010A4
+data4 0x3F801630,0x3F801BBD,0x3F80214A,0x3F8026D7
+data4 0x3F802C64,0x3F8031F2,0x3F803780,0x3F803D0E
+data4 0x3F80429C,0x3F80482B,0x3F804DB9,0x3F805349
+data4 0x3F8058D8,0x3F805E67,0x3F8063F7,0x3F806987
+data4 0x3F806F17,0x3F8074A8,0x3F807A39,0x3F807FCA
+data4 0x3F80855B,0x3F808AEC,0x3F80907E,0x3F809610
+data4 0x3F809BA2,0x3F80A135,0x3F80A6C7,0x3F80AC5A
+data4 0x3F80B1ED,0x3F80B781,0x3F80BD14,0x3F80C2A8
+data4 0x3F80C83C,0x3F80CDD1,0x3F80D365,0x3F80D8FA
+data4 0x3F80DE8F,0x3F80E425,0x3F80E9BA,0x3F80EF50
+data4 0x3F80F4E6,0x3F80FA7C,0x3F810013,0x3F8105AA
+data4 0x3F810B41,0x3F8110D8,0x3F81166F,0x3F811C07
+data4 0x3F81219F,0x3F812737,0x3F812CD0,0x3F813269
+data4 0x3F813802,0x3F813D9B,0x3F814334,0x3F8148CE
+data4 0x3F814E68,0x3F815402,0x3F81599C,0x3F815F37
+ASM_SIZE_DIRECTIVE(Constants_exp_64_T2)
+
+.align 64
+Constants_exp_64_W1:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_W1,@object)
+data4 0x00000000,0x00000000,0x171EC4B4,0xBE384454
+data4 0x4AA72766,0xBE694741,0xD42518F8,0xBE5D32B6
+data4 0x3A319149,0x3E68D96D,0x62415F36,0xBE68F4DA
+data4 0xC9C86A3B,0xBE6DDA2F,0xF49228FE,0x3E6B2E50
+data4 0x1188B886,0xBE49C0C2,0x1A4C2F1F,0x3E64BFC2
+data4 0x2CB98B54,0xBE6A2FBB,0x9A55D329,0x3E5DC5DE
+data4 0x39A7AACE,0x3E696490,0x5C66DBA5,0x3E54728B
+data4 0xBA1C7D7D,0xBE62B0DB,0x09F1AF5F,0x3E576E04
+data4 0x1A0DD6A1,0x3E612500,0x795FBDEF,0xBE66A419
+data4 0xE1BD41FC,0xBE5CDE8C,0xEA54964F,0xBE621376
+data4 0x476E76EE,0x3E6370BE,0x3427EB92,0x3E390D1A
+data4 0x2BF82BF8,0x3E1336DE,0xD0F7BD9E,0xBE5FF1CB
+data4 0x0CEB09DD,0xBE60A355,0x0980F30D,0xBE5CA37E
+data4 0x4C082D25,0xBE5C541B,0x3B467D29,0xBE5BBECA
+data4 0xB9D946C5,0xBE400D8A,0x07ED374A,0xBE5E2A08
+data4 0x365C8B0A,0xBE66CB28,0xD3403BCA,0x3E3AAD5B
+data4 0xC7EA21E0,0x3E526055,0xE72880D6,0xBE442C75
+data4 0x85222A43,0x3E58B2BB,0x522C42BF,0xBE5AAB79
+data4 0x469DC2BC,0xBE605CB4,0xA48C40DC,0xBE589FA7
+data4 0x1AA42614,0xBE51C214,0xC37293F4,0xBE48D087
+data4 0xA2D673E0,0x3E367A1C,0x114F7A38,0xBE51BEBB
+data4 0x661A4B48,0xBE6348E5,0x1D3B9962,0xBDF52643
+data4 0x35A78A53,0x3E3A3B5E,0x1CECD788,0xBE46C46C
+data4 0x7857D689,0xBE60B7EC,0xD14F1AD7,0xBE594D3D
+data4 0x4C9A8F60,0xBE4F9C30,0x02DFF9D2,0xBE521873
+data4 0x55E6D68F,0xBE5E4C88,0x667F3DC4,0xBE62140F
+data4 0x3BF88747,0xBE36961B,0xC96EC6AA,0x3E602861
+data4 0xD57FD718,0xBE3B5151,0xFC4A627B,0x3E561CD0
+data4 0xCA913FEA,0xBE3A5217,0x9A5D193A,0x3E40A3CC
+data4 0x10A9C312,0xBE5AB713,0xC5F57719,0x3E4FDADB
+data4 0xDBDF59D5,0x3E361428,0x61B4180D,0x3E5DB5DB
+data4 0x7408D856,0xBE42AD5F,0x31B2B707,0x3E2A3148
+ASM_SIZE_DIRECTIVE(Constants_exp_64_W1)
+
+.align 64
+Constants_exp_64_W2:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_W2,@object)
+data4 0x00000000,0x00000000,0x37A3D7A2,0xBE641F25
+data4 0xAD028C40,0xBE68DD57,0xF212B1B6,0xBE5C77D8
+data4 0x1BA5B070,0x3E57878F,0x2ECAE6FE,0xBE55A36A
+data4 0x569DFA3B,0xBE620608,0xA6D300A3,0xBE53B50E
+data4 0x223F8F2C,0x3E5B5EF2,0xD6DE0DF4,0xBE56A0D9
+data4 0xEAE28F51,0xBE64EEF3,0x367EA80B,0xBE5E5AE2
+data4 0x5FCBC02D,0x3E47CB1A,0x9BDAFEB7,0xBE656BA0
+data4 0x805AFEE7,0x3E6E70C6,0xA3415EBA,0xBE6E0509
+data4 0x49BFF529,0xBE56856B,0x00508651,0x3E66DD33
+data4 0xC114BC13,0x3E51165F,0xC453290F,0x3E53333D
+data4 0x05539FDA,0x3E6A072B,0x7C0A7696,0xBE47CD87
+data4 0xEB05C6D9,0xBE668BF4,0x6AE86C93,0xBE67C3E3
+data4 0xD0B3E84B,0xBE533904,0x556B53CE,0x3E63E8D9
+data4 0x63A98DC8,0x3E212C89,0x032A7A22,0xBE33138F
+data4 0xBC584008,0x3E530FA9,0xCCB93C97,0xBE6ADF82
+data4 0x8370EA39,0x3E5F9113,0xFB6A05D8,0x3E5443A4
+data4 0x181FEE7A,0x3E63DACD,0xF0F67DEC,0xBE62B29D
+data4 0x3DDE6307,0x3E65C483,0xD40A24C1,0x3E5BF030
+data4 0x14E437BE,0x3E658B8F,0xED98B6C7,0xBE631C29
+data4 0x04CF7C71,0x3E6335D2,0xE954A79D,0x3E529EED
+data4 0xF64A2FB8,0x3E5D9257,0x854ED06C,0xBE6BED1B
+data4 0xD71405CB,0x3E5096F6,0xACB9FDF5,0xBE3D4893
+data4 0x01B68349,0xBDFEB158,0xC6A463B9,0x3E628D35
+data4 0xADE45917,0xBE559725,0x042FC476,0xBE68C29C
+data4 0x01E511FA,0xBE67593B,0x398801ED,0xBE4A4313
+data4 0xDA7C3300,0x3E699571,0x08062A9E,0x3E5349BE
+data4 0x755BB28E,0x3E5229C4,0x77A1F80D,0x3E67E426
+data4 0x6B69C352,0xBE52B33F,0x084DA57F,0xBE6B3550
+data4 0xD1D09A20,0xBE6DB03F,0x2161B2C1,0xBE60CBC4
+data4 0x78A2B771,0x3E56ED9C,0x9D0FA795,0xBE508E31
+data4 0xFD1A54E9,0xBE59482A,0xB07FD23E,0xBE2A17CE
+data4 0x17365712,0x3E68BF5C,0xB3785569,0x3E3956F9
+ASM_SIZE_DIRECTIVE(Constants_exp_64_W2)
+
+.section .text
+.proc expm1f#
+.global expm1f#
+.align 64
+
+expm1f:
+#ifdef _LIBC
+.global __expm1f#
+__expm1f:
+#endif
+
+
+{ .mii
+ alloc r32 = ar.pfs,0,30,4,0
+(p0) add r33 = 1, r0
+(p0) cmp.eq.unc p7, p0 = r0, r0
+}
+;;
+
+//
+// Set p7 true for expm1
+// Set Flag = r33 = 1 for expm1
+// These are really no longer necesary, but are a remnant
+// when this file had multiple entry points.
+// They should be carefully removed
+
+
+{ .mfi
+(p0) add r32 = 0,r0
+(p0) fnorm.s1 f9 = f8
+ nop.i 0
+}
+
+{ .mfi
+ nop.m 0
+//
+// Set p7 false for exp
+// Set Flag = r33 = 0 for exp
+//
+(p0) fclass.m.unc p6, p8 = f8, 0x1E7
+ nop.i 0 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fclass.nm.unc p9, p0 = f8, 0x1FF
+ nop.i 0
+}
+
+{ .mfi
+ nop.m 999
+(p0) mov f36 = f1
+ nop.i 999 ;;
+}
+
+//
+// Identify NatVals, NaNs, Infs, and Zeros.
+// Identify EM unsupporteds.
+// Save special input registers
+//
+// Create FR_X_cor = 0.0
+// GR_Flag = 0
+// GR_Expo_Range = 0 (r32) for single precision
+// FR_Scale = 1.0
+//
+
+{ .mfb
+ nop.m 999
+(p0) mov f32 = f0
+(p6) br.cond.spnt EXPF_64_SPECIAL ;;
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+(p9) br.cond.spnt EXPF_64_UNSUPPORTED ;;
+}
+
+//
+// Branch out for special input values
+//
+
+{ .mfi
+(p0) cmp.ne.unc p12, p13 = 0x01, r33
+(p0) fcmp.lt.unc.s0 p9,p0 = f8, f0
+(p0) cmp.eq.unc p15, p0 = r0, r0
+}
+
+//
+// Raise possible denormal operand exception
+// Normalize x
+//
+// This function computes expf( x + x_cor)
+// Input FR 1: FR_X
+// Input FR 2: FR_X_cor
+// Input GR 1: GR_Flag
+// Input GR 2: GR_Expo_Range
+// Output FR 3: FR_Y_hi
+// Output FR 4: FR_Y_lo
+// Output FR 5: FR_Scale
+// Output PR 1: PR_Safe
+
+//
+// Prepare to load constants
+// Set Safe = True
+//
+
+{ .mmi
+(p0) addl r34 = @ltoff(Constants_exp_64_Arg#),gp
+(p0) addl r40 = @ltoff(Constants_exp_64_W1#),gp
+(p0) addl r41 = @ltoff(Constants_exp_64_W2#),gp
+};;
+
+{ .mmi
+ ld8 r34 = [r34]
+ ld8 r40 = [r40]
+(p0) addl r50 = @ltoff(Constants_exp_64_T1#), gp
+}
+;;
+{ .mmi
+ ld8 r41 = [r41]
+(p0) ldfe f37 = [r34],16
+(p0) addl r51 = @ltoff(Constants_exp_64_T2#), gp
+}
+;;
+//
+// N = fcvt.fx(float_N)
+// Set p14 if -6 > expo_X
+//
+//
+// Bias = 0x0FFFF
+// expo_X = expo_X and Mask
+//
+
+{ .mmi
+ ld8 r50 = [r50]
+(p0) ldfe f40 = [r34],16
+ nop.i 999
+}
+;;
+
+{ .mlx
+ nop.m 999
+(p0) movl r58 = 0x0FFFF
+};;
+
+//
+// Load W2_ptr
+// Branch to SMALL is expo_X < -6
+//
+//
+// float_N = X * L_Inv
+// expo_X = exponent of X
+// Mask = 0x1FFFF
+//
+
+{ .mmi
+ ld8 r51 = [r51]
+(p0) ldfe f41 = [r34],16
+//
+// float_N = X * L_Inv
+// expo_X = exponent of X
+// Mask = 0x1FFFF
+//
+ nop.i 0
+};;
+
+{ .mlx
+(p0) addl r34 = @ltoff(Constants_exp_64_Exponents#), gp
+(p0) movl r39 = 0x1FFFF
+}
+;;
+
+{ .mmi
+ ld8 r34 = [r34]
+(p0) getf.exp r37 = f9
+ nop.i 999
+}
+;;
+
+{ .mii
+ nop.m 999
+ nop.i 999
+(p0) and r37 = r37, r39 ;;
+}
+
+{ .mmi
+(p0) sub r37 = r37, r58 ;;
+(p0) cmp.gt.unc p14, p0 = -6, r37
+(p0) cmp.lt.unc p10, p0 = 14, r37 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Load L_inv
+// Set p12 true for Flag = 0 (exp)
+// Set p13 true for Flag = 1 (expm1)
+//
+(p0) fmpy.s1 f38 = f9, f37
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+//
+// Load L_hi
+// expo_X = expo_X - Bias
+// get W1_ptr
+//
+(p0) fcvt.fx.s1 f39 = f38
+(p14) br.cond.spnt EXPF_SMALL ;;
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+(p10) br.cond.spnt EXPF_HUGE ;;
+}
+
+{ .mmi
+(p0) shladd r34 = r32,4,r34
+(p0) addl r35 = @ltoff(Constants_exp_64_A#),gp
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ld8 r35 = [r35]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+//
+// Load T_1,T_2
+//
+
+{ .mmb
+(p0) ldfe f51 = [r35],16
+(p0) ld8 r45 = [r34],8
+ nop.b 999 ;;
+}
+//
+// Set Safe = True if k >= big_expo_neg
+// Set Safe = False if k < big_expo_neg
+//
+
+{ .mmb
+(p0) ldfe f49 = [r35],16
+(p0) ld8 r48 = [r34],0
+ nop.b 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Branch to HUGE is expo_X > 14
+//
+(p0) fcvt.xf f38 = f39
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p0) getf.sig r52 = f39
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mii
+ nop.m 999
+(p0) extr.u r43 = r52, 6, 6 ;;
+//
+// r = r - float_N * L_lo
+// K = extr(N_fix,12,52)
+//
+(p0) shladd r40 = r43,3,r40 ;;
+}
+
+{ .mfi
+(p0) shladd r50 = r43,2,r50
+(p0) fnma.s1 f42 = f40, f38, f9
+//
+// float_N = float(N)
+// N_fix = signficand N
+//
+(p0) extr.u r42 = r52, 0, 6
+}
+
+{ .mmi
+(p0) ldfd f43 = [r40],0 ;;
+(p0) shladd r41 = r42,3,r41
+(p0) shladd r51 = r42,2,r51
+}
+//
+// W_1_p1 = 1 + W_1
+//
+
+{ .mmi
+(p0) ldfs f44 = [r50],0 ;;
+(p0) ldfd f45 = [r41],0
+//
+// M_2 = extr(N_fix,0,6)
+// M_1 = extr(N_fix,6,6)
+// r = X - float_N * L_hi
+//
+(p0) extr r44 = r52, 12, 52
+}
+
+{ .mmi
+(p0) ldfs f46 = [r51],0 ;;
+(p0) sub r46 = r58, r44
+(p0) cmp.gt.unc p8, p15 = r44, r45
+}
+//
+// W = W_1 + W_1_p1*W_2
+// Load A_2
+// Bias_m_K = Bias - K
+//
+
+{ .mii
+(p0) ldfe f40 = [r35],16
+//
+// load A_1
+// poly = A_2 + r*A_3
+// rsq = r * r
+// neg_2_mK = exponent of Bias_m_k
+//
+(p0) add r47 = r58, r44 ;;
+//
+// Set Safe = True if k <= big_expo_pos
+// Set Safe = False if k > big_expo_pos
+// Load A_3
+//
+(p15) cmp.lt p8,p15 = r44,r48 ;;
+}
+
+{ .mmf
+(p0) setf.exp f61 = r46
+//
+// Bias_p + K = Bias + K
+// T = T_1 * T_2
+//
+(p0) setf.exp f36 = r47
+(p0) fnma.s1 f42 = f41, f38, f42 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Load W_1,W_2
+// Load big_exp_pos, load big_exp_neg
+//
+(p0) fadd.s1 f47 = f43, f1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 f52 = f42, f51, f49
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fmpy.s1 f48 = f42, f42
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fmpy.s1 f53 = f44, f46
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 f54 = f45, f47, f43
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fneg f61 = f61
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 f52 = f42, f52, f40
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fadd.s1 f55 = f54, f1
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+//
+// W + Wp1 * poly
+//
+(p0) mov f34 = f53
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// A_1 + r * poly
+// Scale = setf_expf(Bias_p_k)
+//
+(p0) fma.s1 f52 = f48, f52, f42
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// poly = r + rsq(A_1 + r*poly)
+// Wp1 = 1 + W
+// neg_2_mK = -neg_2_mK
+//
+(p0) fma.s1 f35 = f55, f52, f54
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p0) fmpy.s1 f35 = f35, f53
+//
+// Y_hi = T
+// Y_lo = T * (W + Wp1*poly)
+//
+(p12) br.cond.sptk EXPF_MAIN ;;
+}
+//
+// Branch if expf(x)
+// Continue for expf(x-1)
+//
+
+{ .mii
+(p0) cmp.lt.unc p12, p13 = 10, r44
+ nop.i 999 ;;
+//
+// Set p12 if 10 < K, Else p13
+//
+(p13) cmp.gt.unc p13, p14 = -10, r44 ;;
+}
+//
+// K > 10: Y_lo = Y_lo + neg_2_mK
+// K <=10: Set p13 if -10 > K, Else set p14
+//
+
+{ .mfi
+(p13) cmp.eq p15, p0 = r0, r0
+(p14) fadd.s1 f34 = f61, f34
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p12) fadd.s1 f35 = f35, f61
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p13) fadd.s1 f35 = f35, f34
+ nop.i 999
+}
+
+{ .mfb
+ nop.m 999
+//
+// K <= 10 and K < -10, Set Safe = True
+// K <= 10 and K < 10, Y_lo = Y_hi + Y_lo
+// K <= 10 and K > =-10, Y_hi = Y_hi + neg_2_mk
+//
+(p13) mov f34 = f61
+(p0) br.cond.sptk EXPF_MAIN ;;
+}
+EXPF_SMALL:
+{ .mmi
+(p12) addl r35 = @ltoff(Constants_exp_64_P#), gp
+(p0) addl r34 = @ltoff(Constants_exp_64_Exponents#), gp
+ nop.i 999
+}
+;;
+
+{ .mmi
+(p12) ld8 r35 = [r35]
+ ld8 r34 = [r34]
+ nop.i 999
+}
+;;
+
+
+{ .mmi
+(p13) addl r35 = @ltoff(Constants_exp_64_Q#), gp
+ nop.m 999
+ nop.i 999
+}
+;;
+
+
+//
+// Return
+// K <= 10 and K < 10, Y_hi = neg_2_mk
+//
+// /*******************************************************/
+// /*********** Branch EXP_SMALL *************************/
+// /*******************************************************/
+
+{ .mfi
+(p13) ld8 r35 = [r35]
+(p0) mov f42 = f9
+(p0) add r34 = 0x48,r34
+}
+;;
+
+//
+// Flag = 0
+// r4 = rsq * rsq
+//
+
+{ .mfi
+(p0) ld8 r49 =[r34],0
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mii
+ nop.m 999
+ nop.i 999 ;;
+//
+// Flag = 1
+//
+(p0) cmp.lt.unc p14, p0 = r37, r49 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// r = X
+//
+(p0) fmpy.s1 f48 = f42, f42
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+//
+// rsq = r * r
+//
+(p0) fmpy.s1 f50 = f48, f48
+//
+// Is input very small?
+//
+(p14) br.cond.spnt EXPF_VERY_SMALL ;;
+}
+//
+// Flag_not1: Y_hi = 1.0
+// Flag is 1: r6 = rsq * r4
+//
+
+{ .mfi
+(p12) ldfe f52 = [r35],16
+(p12) mov f34 = f1
+(p0) add r53 = 0x1,r0 ;;
+}
+
+{ .mfi
+(p13) ldfe f51 = [r35],16
+//
+// Flag_not_1: Y_lo = poly_hi + r4 * poly_lo
+//
+(p13) mov f34 = f9
+ nop.i 999 ;;
+}
+
+{ .mmf
+(p12) ldfe f53 = [r35],16
+//
+// For Flag_not_1, Y_hi = X
+// Scale = 1
+// Create 0x000...01
+//
+(p0) setf.sig f37 = r53
+(p0) mov f36 = f1 ;;
+}
+
+{ .mmi
+(p13) ldfe f52 = [r35],16 ;;
+(p12) ldfe f54 = [r35],16
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p13) ldfe f53 = [r35],16
+(p13) fmpy.s1 f58 = f48, f50
+ nop.i 999 ;;
+}
+//
+// Flag_not1: poly_lo = P_5 + r*P_6
+// Flag_1: poly_lo = Q_6 + r*Q_7
+//
+
+{ .mmi
+(p13) ldfe f54 = [r35],16 ;;
+(p12) ldfe f55 = [r35],16
+ nop.i 999 ;;
+}
+
+{ .mmi
+(p12) ldfe f56 = [r35],16 ;;
+(p13) ldfe f55 = [r35],16
+ nop.i 999 ;;
+}
+
+{ .mmi
+(p12) ldfe f57 = [r35],0 ;;
+(p13) ldfe f56 = [r35],16
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p13) ldfe f57 = [r35],0
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// For Flag_not_1, load p5,p6,p1,p2
+// Else load p5,p6,p1,p2
+//
+(p12) fma.s1 f60 = f52, f42, f53
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p13) fma.s1 f60 = f51, f42, f52
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p12) fma.s1 f60 = f60, f42, f54
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p12) fma.s1 f59 = f56, f42, f57
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p13) fma.s1 f60 = f42, f60, f53
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p12) fma.s1 f59 = f59, f48, f42
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Flag_1: poly_lo = Q_5 + r*(Q_6 + r*Q_7)
+// Flag_not1: poly_lo = P_4 + r*(P_5 + r*P_6)
+// Flag_not1: poly_hi = (P_1 + r*P_2)
+//
+(p13) fmpy.s1 f60 = f60, f58
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p12) fma.s1 f60 = f60, f42, f55
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Flag_1: poly_lo = r6 *(Q_5 + ....)
+// Flag_not1: poly_hi = r + rsq *(P_1 + r*P_2)
+//
+(p12) fma.s1 f35 = f60, f50, f59
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p13) fma.s1 f59 = f54, f42, f55
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Flag_not1: Y_lo = rsq* poly_hi + poly_lo
+// Flag_1: poly_lo = rsq* poly_hi + poly_lo
+//
+(p13) fma.s1 f59 = f59, f42, f56
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Flag_not_1: (P_1 + r*P_2)
+//
+(p13) fma.s1 f59 = f59, f42, f57
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Flag_not_1: poly_hi = r + rsq * (P_1 + r*P_2)
+//
+(p13) fma.s1 f35 = f59, f48, f60
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Create 0.000...01
+//
+(p0) for f37 = f35, f37
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+//
+// Set lsb of Y_lo to 1
+//
+(p0) fmerge.se f35 = f35,f37
+(p0) br.cond.sptk EXPF_MAIN ;;
+}
+EXPF_VERY_SMALL:
+
+{ .mmi
+ nop.m 999
+(p13) addl r34 = @ltoff(Constants_exp_64_Exponents#),gp
+ nop.i 999;;
+}
+
+{ .mfi
+(p13) ld8 r34 = [r34];
+(p12) mov f35 = f9
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p12) mov f34 = f1
+(p12) br.cond.sptk EXPF_MAIN ;;
+}
+
+{ .mlx
+(p13) add r34 = 8,r34
+(p13) movl r39 = 0x0FFFE ;;
+}
+//
+// Load big_exp_neg
+// Create 1/2's exponent
+//
+
+{ .mii
+(p13) setf.exp f56 = r39
+(p13) shladd r34 = r32,4,r34 ;;
+ nop.i 999
+}
+//
+// Negative exponents are stored after positive
+//
+
+{ .mfi
+(p13) ld8 r45 = [r34],0
+//
+// Y_hi = x
+// Scale = 1
+//
+(p13) fmpy.s1 f35 = f9, f9
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Reset Safe if necessary
+// Create 1/2
+//
+(p13) mov f34 = f9
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p13) cmp.lt.unc p0, p15 = r37, r45
+(p13) mov f36 = f1
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+//
+// Y_lo = x * x
+//
+(p13) fmpy.s1 f35 = f35, f56
+//
+// Y_lo = x*x/2
+//
+(p13) br.cond.sptk EXPF_MAIN ;;
+}
+EXPF_HUGE:
+
+{ .mfi
+ nop.m 999
+(p0) fcmp.gt.unc.s1 p14, p0 = f9, f0
+ nop.i 999
+}
+
+{ .mlx
+ nop.m 999
+(p0) movl r39 = 0x15DC0 ;;
+}
+
+{ .mfi
+(p14) setf.exp f34 = r39
+(p14) mov f35 = f1
+(p14) cmp.eq p0, p15 = r0, r0 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p14) mov f36 = f34
+//
+// If x > 0, Set Safe = False
+// If x > 0, Y_hi = 2**(24,000)
+// If x > 0, Y_lo = 1.0
+// If x > 0, Scale = 2**(24,000)
+//
+(p14) br.cond.sptk EXPF_MAIN ;;
+}
+
+{ .mlx
+ nop.m 999
+(p12) movl r39 = 0xA240
+}
+
+{ .mlx
+ nop.m 999
+(p12) movl r38 = 0xA1DC ;;
+}
+
+{ .mmb
+(p13) cmp.eq p15, p14 = r0, r0
+(p12) setf.exp f34 = r39
+ nop.b 999 ;;
+}
+
+{ .mlx
+(p12) setf.exp f35 = r38
+(p13) movl r39 = 0xFF9C
+}
+
+{ .mfi
+ nop.m 999
+(p13) fsub.s1 f34 = f0, f1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p12) mov f36 = f34
+(p12) cmp.eq p0, p15 = r0, r0 ;;
+}
+
+{ .mfi
+(p13) setf.exp f35 = r39
+(p13) mov f36 = f1
+ nop.i 999 ;;
+}
+EXPF_MAIN:
+
+{ .mfi
+(p0) cmp.ne.unc p12, p0 = 0x01, r33
+(p0) fmpy.s1 f101 = f36, f35
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p0) fma.s.s0 f99 = f34, f36, f101
+(p15) br.cond.sptk EXPF_64_RETURN ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fsetc.s3 0x7F,0x01
+ nop.i 999
+}
+
+{ .mlx
+ nop.m 999
+(p0) movl r50 = 0x0000000001007F ;;
+}
+//
+// S0 user supplied status
+// S2 user supplied status + WRE + TD (Overflows)
+// S3 user supplied status + RZ + TD (Underflows)
+//
+//
+// If (Safe) is true, then
+// Compute result using user supplied status field.
+// No overflow or underflow here, but perhaps inexact.
+// Return
+// Else
+// Determine if overflow or underflow was raised.
+// Fetch +/- overflow threshold for IEEE single, double,
+// double extended
+//
+
+{ .mfi
+(p0) setf.exp f60 = r50
+(p0) fma.s.s3 f102 = f34, f36, f101
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fsetc.s3 0x7F,0x40
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// For Safe, no need to check for over/under.
+// For expm1, handle errors like exp.
+//
+(p0) fsetc.s2 0x7F,0x42
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s.s2 f100 = f34, f36, f101
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fsetc.s2 0x7F,0x40
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p7) fclass.m.unc p12, p0 = f102, 0x00F
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p11, p0 = f102, 0x00F
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p7) fcmp.ge.unc.s1 p10, p0 = f100, f60
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+//
+// Create largest double exponent + 1.
+// Create smallest double exponent - 1.
+//
+(p0) fcmp.ge.unc.s1 p8, p0 = f100, f60
+ nop.i 999 ;;
+}
+//
+// fcmp: resultS2 >= + overflow threshold -> set (a) if true
+// fcmp: resultS2 <= - overflow threshold -> set (b) if true
+// fclass: resultS3 is denorm/unorm/0 -> set (d) if true
+//
+
+{ .mib
+(p10) mov GR_Parameter_TAG = 43
+ nop.i 999
+(p10) br.cond.sptk __libm_error_region ;;
+}
+
+{ .mib
+(p8) mov GR_Parameter_TAG = 16
+ nop.i 999
+(p8) br.cond.sptk __libm_error_region ;;
+}
+//
+// Report that exp overflowed
+//
+
+{ .mib
+(p12) mov GR_Parameter_TAG = 44
+ nop.i 999
+(p12) br.cond.sptk __libm_error_region ;;
+}
+
+{ .mib
+(p11) mov GR_Parameter_TAG = 17
+ nop.i 999
+(p11) br.cond.sptk __libm_error_region ;;
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+//
+// Report that exp underflowed
+//
+(p0) br.cond.sptk EXPF_64_RETURN ;;
+}
+EXPF_64_SPECIAL:
+
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p6, p0 = f8, 0x0c3
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p13, p8 = f8, 0x007
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p7) fclass.m.unc p14, p0 = f8, 0x007
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p12, p9 = f8, 0x021
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p11, p0 = f8, 0x022
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p7) fclass.m.unc p10, p0 = f8, 0x022
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Identify +/- 0, Inf, or -Inf
+// Generate the right kind of NaN.
+//
+(p13) fadd.s.s0 f99 = f0, f1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p14) mov f99 = f8
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p6) fadd.s.s0 f99 = f8, f1
+//
+// expf(+/-0) = 1
+// expm1f(+/-0) = +/-0
+// No exceptions raised
+//
+(p6) br.cond.sptk EXPF_64_RETURN ;;
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+(p14) br.cond.sptk EXPF_64_RETURN ;;
+}
+
+{ .mfi
+ nop.m 999
+(p11) mov f99 = f0
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p10) fsub.s.s1 f99 = f0, f1
+//
+// expf(-Inf) = 0
+// expm1f(-Inf) = -1
+// No exceptions raised.
+//
+(p10) br.cond.sptk EXPF_64_RETURN ;;
+}
+
+{ .mfb
+ nop.m 999
+(p12) fmpy.s.s1 f99 = f8, f1
+//
+// expf(+Inf) = Inf
+// No exceptions raised.
+//
+(p0) br.cond.sptk EXPF_64_RETURN ;;
+}
+EXPF_64_UNSUPPORTED:
+
+{ .mfb
+ nop.m 999
+(p0) fmpy.s.s0 f99 = f8, f0
+ nop.b 0;;
+}
+
+EXPF_64_RETURN:
+{ .mfb
+ nop.m 999
+(p0) mov f8 = f99
+(p0) br.ret.sptk b0
+}
+.endp expm1f
+ASM_SIZE_DIRECTIVE(expm1f)
+
+
+.proc __libm_error_region
+__libm_error_region:
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfs [GR_Parameter_Y] = FR_Y,16 // Store Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mib
+ stfs [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
+}
+{ .mib
+ stfs [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+{ .mmi
+ ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/s_expm1l.S b/sysdeps/ia64/fpu/s_expm1l.S
new file mode 100644
index 0000000..a31910a
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_expm1l.S
@@ -0,0 +1,1603 @@
+.file "exp_m1l.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 4/04/00 Unwind support added
+// 8/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+//
+// *********************************************************************
+//
+// Function: Combined expl(x) and expm1l(x), where
+// x
+// expl(x) = e , for double-extended precision x values
+// x
+// expm1l(x) = e - 1 for double-extended precision x values
+//
+// *********************************************************************
+//
+// Resources Used:
+//
+// Floating-Point Registers: f8 (Input and Return Value)
+// f9,f32-f61, f99-f102
+//
+// General Purpose Registers:
+// r32-r61
+// r62-r65 (Used to pass arguments to error handling routine)
+//
+// Predicate Registers: p6-p15
+//
+// *********************************************************************
+//
+// IEEE Special Conditions:
+//
+// Denormal fault raised on denormal inputs
+// Overflow exceptions raised when appropriate for exp and expm1
+// Underflow exceptions raised when appropriate for exp and expm1
+// (Error Handling Routine called for overflow and Underflow)
+// Inexact raised when appropriate by algorithm
+//
+// expl(inf) = inf
+// expl(-inf) = +0
+// expl(SNaN) = QNaN
+// expl(QNaN) = QNaN
+// expl(0) = 1
+// expl(EM_special Values) = QNaN
+// expl(inf) = inf
+// expm1l(-inf) = -1
+// expm1l(SNaN) = QNaN
+// expm1l(QNaN) = QNaN
+// expm1l(0) = 0
+// expm1l(EM_special Values) = QNaN
+//
+// *********************************************************************
+//
+// Implementation and Algorithm Notes:
+//
+// ker_exp_64( in_FR : X,
+// in_GR : Flag,
+// in_GR : Expo_Range
+// out_FR : Y_hi,
+// out_FR : Y_lo,
+// out_FR : scale,
+// out_PR : Safe )
+//
+// On input, X is in register format and
+// Flag = 0 for exp,
+// Flag = 1 for expm1,
+//
+// On output, provided X and X_cor are real numbers, then
+//
+// scale*(Y_hi + Y_lo) approximates expl(X) if Flag is 0
+// scale*(Y_hi + Y_lo) approximates expl(X)-1 if Flag is 1
+//
+// The accuracy is sufficient for a highly accurate 64 sig.
+// bit implementation. Safe is set if there is no danger of
+// overflow/underflow when the result is composed from scale,
+// Y_hi and Y_lo. Thus, we can have a fast return if Safe is set.
+// Otherwise, one must prepare to handle the possible exception
+// appropriately. Note that SAFE not set (false) does not mean
+// that overflow/underflow will occur; only the setting of SAFE
+// guarantees the opposite.
+//
+// **** High Level Overview ****
+//
+// The method consists of three cases.
+//
+// If |X| < Tiny use case exp_tiny;
+// else if |X| < 2^(-6) use case exp_small;
+// else use case exp_regular;
+//
+// Case exp_tiny:
+//
+// 1 + X can be used to approximate expl(X) or expl(X+X_cor);
+// X + X^2/2 can be used to approximate expl(X) - 1
+//
+// Case exp_small:
+//
+// Here, expl(X), expl(X+X_cor), and expl(X) - 1 can all be
+// appproximated by a relatively simple polynomial.
+//
+// This polynomial resembles the truncated Taylor series
+//
+// expl(w) = 1 + w + w^2/2! + w^3/3! + ... + w^n/n!
+//
+// Case exp_regular:
+//
+// Here we use a table lookup method. The basic idea is that in
+// order to compute expl(X), we accurately decompose X into
+//
+// X = N * log(2)/(2^12) + r, |r| <= log(2)/2^13.
+//
+// Hence
+//
+// expl(X) = 2^( N / 2^12 ) * expl(r).
+//
+// The value 2^( N / 2^12 ) is obtained by simple combinations
+// of values calculated beforehand and stored in table; expl(r)
+// is approximated by a short polynomial because |r| is small.
+//
+// We elaborate this method in 4 steps.
+//
+// Step 1: Reduction
+//
+// The value 2^12/log(2) is stored as a double-extended number
+// L_Inv.
+//
+// N := round_to_nearest_integer( X * L_Inv )
+//
+// The value log(2)/2^12 is stored as two numbers L_hi and L_lo so
+// that r can be computed accurately via
+//
+// r := (X - N*L_hi) - N*L_lo
+//
+// We pick L_hi such that N*L_hi is representable in 64 sig. bits
+// and thus the FMA X - N*L_hi is error free. So r is the
+// 1 rounding error from an exact reduction with respect to
+//
+// L_hi + L_lo.
+//
+// In particular, L_hi has 30 significant bit and can be stored
+// as a double-precision number; L_lo has 64 significant bits and
+// stored as a double-extended number.
+//
+// In the case Flag = 2, we further modify r by
+//
+// r := r + X_cor.
+//
+// Step 2: Approximation
+//
+// expl(r) - 1 is approximated by a short polynomial of the form
+//
+// r + A_1 r^2 + A_2 r^3 + A_3 r^4 .
+//
+// Step 3: Composition from Table Values
+//
+// The value 2^( N / 2^12 ) can be composed from a couple of tables
+// of precalculated values. First, express N as three integers
+// K, M_1, and M_2 as
+//
+// N = K * 2^12 + M_1 * 2^6 + M_2
+//
+// Where 0 <= M_1, M_2 < 2^6; and K can be positive or negative.
+// When N is represented in 2's complement, M_2 is simply the 6
+// lsb's, M_1 is the next 6, and K is simply N shifted right
+// arithmetically (sign extended) by 12 bits.
+//
+// Now, 2^( N / 2^12 ) is simply
+//
+// 2^K * 2^( M_1 / 2^6 ) * 2^( M_2 / 2^12 )
+//
+// Clearly, 2^K needs no tabulation. The other two values are less
+// trivial because if we store each accurately to more than working
+// precision, than its product is too expensive to calculate. We
+// use the following method.
+//
+// Define two mathematical values, delta_1 and delta_2, implicitly
+// such that
+//
+// T_1 = expl( [M_1 log(2)/2^6] - delta_1 )
+// T_2 = expl( [M_2 log(2)/2^12] - delta_2 )
+//
+// are representable as 24 significant bits. To illustrate the idea,
+// we show how we define delta_1:
+//
+// T_1 := round_to_24_bits( expl( M_1 log(2)/2^6 ) )
+// delta_1 = (M_1 log(2)/2^6) - log( T_1 )
+//
+// The last equality means mathematical equality. We then tabulate
+//
+// W_1 := expl(delta_1) - 1
+// W_2 := expl(delta_2) - 1
+//
+// Both in double precision.
+//
+// From the tabulated values T_1, T_2, W_1, W_2, we compose the values
+// T and W via
+//
+// T := T_1 * T_2 ...exactly
+// W := W_1 + (1 + W_1)*W_2
+//
+// W approximates expl( delta ) - 1 where delta = delta_1 + delta_2.
+// The mathematical product of T and (W+1) is an accurate representation
+// of 2^(M_1/2^6) * 2^(M_2/2^12).
+//
+// Step 4. Reconstruction
+//
+// Finally, we can reconstruct expl(X), expl(X) - 1.
+// Because
+//
+// X = K * log(2) + (M_1*log(2)/2^6 - delta_1)
+// + (M_2*log(2)/2^12 - delta_2)
+// + delta_1 + delta_2 + r ...accurately
+// We have
+//
+// expl(X) ~=~ 2^K * ( T + T*[expl(delta_1+delta_2+r) - 1] )
+// ~=~ 2^K * ( T + T*[expl(delta + r) - 1] )
+// ~=~ 2^K * ( T + T*[(expl(delta)-1)
+// + expl(delta)*(expl(r)-1)] )
+// ~=~ 2^K * ( T + T*( W + (1+W)*poly(r) ) )
+// ~=~ 2^K * ( Y_hi + Y_lo )
+//
+// where Y_hi = T and Y_lo = T*(W + (1+W)*poly(r))
+//
+// For expl(X)-1, we have
+//
+// expl(X)-1 ~=~ 2^K * ( Y_hi + Y_lo ) - 1
+// ~=~ 2^K * ( Y_hi + Y_lo - 2^(-K) )
+//
+// and we combine Y_hi + Y_lo - 2^(-N) into the form of two
+// numbers Y_hi + Y_lo carefully.
+//
+// **** Algorithm Details ****
+//
+// A careful algorithm must be used to realize the mathematical ideas
+// accurately. We describe each of the three cases. We assume SAFE
+// is preset to be TRUE.
+//
+// Case exp_tiny:
+//
+// The important points are to ensure an accurate result under
+// different rounding directions and a correct setting of the SAFE
+// flag.
+//
+// If Flag is 1, then
+// SAFE := False ...possibility of underflow
+// Scale := 1.0
+// Y_hi := X
+// Y_lo := 2^(-17000)
+// Else
+// Scale := 1.0
+// Y_hi := 1.0
+// Y_lo := X ...for different rounding modes
+// Endif
+//
+// Case exp_small:
+//
+// Here we compute a simple polynomial. To exploit parallelism, we split
+// the polynomial into several portions.
+//
+// Let r = X
+//
+// If Flag is not 1 ...i.e. expl( argument )
+//
+// rsq := r * r;
+// r4 := rsq*rsq
+// poly_lo := P_3 + r*(P_4 + r*(P_5 + r*P_6))
+// poly_hi := r + rsq*(P_1 + r*P_2)
+// Y_lo := poly_hi + r4 * poly_lo
+// set lsb(Y_lo) to 1
+// Y_hi := 1.0
+// Scale := 1.0
+//
+// Else ...i.e. expl( argument ) - 1
+//
+// rsq := r * r
+// r4 := rsq * rsq
+// r6 := rsq * r4
+// poly_lo := r6*(Q_5 + r*(Q_6 + r*Q_7))
+// poly_hi := Q_1 + r*(Q_2 + r*(Q_3 + r*Q_4))
+// Y_lo := rsq*poly_hi + poly_lo
+// set lsb(Y_lo) to 1
+// Y_hi := X
+// Scale := 1.0
+//
+// Endif
+//
+// Case exp_regular:
+//
+// The previous description contain enough information except the
+// computation of poly and the final Y_hi and Y_lo in the case for
+// expl(X)-1.
+//
+// The computation of poly for Step 2:
+//
+// rsq := r*r
+// poly := r + rsq*(A_1 + r*(A_2 + r*A_3))
+//
+// For the case expl(X) - 1, we need to incorporate 2^(-K) into
+// Y_hi and Y_lo at the end of Step 4.
+//
+// If K > 10 then
+// Y_lo := Y_lo - 2^(-K)
+// Else
+// If K < -10 then
+// Y_lo := Y_hi + Y_lo
+// Y_hi := -2^(-K)
+// Else
+// Y_hi := Y_hi - 2^(-K)
+// End If
+// End If
+//
+
+#include "libm_support.h"
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
+.align 64
+Constants_exp_64_Arg:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_Arg,@object)
+data4 0x5C17F0BC,0xB8AA3B29,0x0000400B,0x00000000
+data4 0x00000000,0xB17217F4,0x00003FF2,0x00000000
+data4 0xF278ECE6,0xF473DE6A,0x00003FD4,0x00000000
+// /* Inv_L, L_hi, L_lo */
+ASM_SIZE_DIRECTIVE(Constants_exp_64_Arg)
+
+.align 64
+Constants_exp_64_Exponents:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_Exponents,@object)
+data4 0x0000007E,0x00000000,0xFFFFFF83,0xFFFFFFFF
+data4 0x000003FE,0x00000000,0xFFFFFC03,0xFFFFFFFF
+data4 0x00003FFE,0x00000000,0xFFFFC003,0xFFFFFFFF
+data4 0x00003FFE,0x00000000,0xFFFFC003,0xFFFFFFFF
+data4 0xFFFFFFE2,0xFFFFFFFF,0xFFFFFFC4,0xFFFFFFFF
+data4 0xFFFFFFBA,0xFFFFFFFF,0xFFFFFFBA,0xFFFFFFFF
+ASM_SIZE_DIRECTIVE(Constants_exp_64_Exponents)
+
+.align 64
+Constants_exp_64_A:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_A,@object)
+data4 0xB1B736A0,0xAAAAAAAB,0x00003FFA,0x00000000
+data4 0x90CD6327,0xAAAAAAAB,0x00003FFC,0x00000000
+data4 0xFFFFFFFF,0xFFFFFFFF,0x00003FFD,0x00000000
+// /* Reversed */
+ASM_SIZE_DIRECTIVE(Constants_exp_64_A)
+
+.align 64
+Constants_exp_64_P:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_P,@object)
+data4 0x43914A8A,0xD00D6C81,0x00003FF2,0x00000000
+data4 0x30304B30,0xB60BC4AC,0x00003FF5,0x00000000
+data4 0x7474C518,0x88888888,0x00003FF8,0x00000000
+data4 0x8DAE729D,0xAAAAAAAA,0x00003FFA,0x00000000
+data4 0xAAAAAF61,0xAAAAAAAA,0x00003FFC,0x00000000
+data4 0x000004C7,0x80000000,0x00003FFE,0x00000000
+// /* Reversed */
+ASM_SIZE_DIRECTIVE(Constants_exp_64_P)
+
+.align 64
+Constants_exp_64_Q:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_Q,@object)
+data4 0xA49EF6CA,0xD00D56F7,0x00003FEF,0x00000000
+data4 0x1C63493D,0xD00D59AB,0x00003FF2,0x00000000
+data4 0xFB50CDD2,0xB60B60B5,0x00003FF5,0x00000000
+data4 0x7BA68DC8,0x88888888,0x00003FF8,0x00000000
+data4 0xAAAAAC8D,0xAAAAAAAA,0x00003FFA,0x00000000
+data4 0xAAAAACCA,0xAAAAAAAA,0x00003FFC,0x00000000
+data4 0x00000000,0x80000000,0x00003FFE,0x00000000
+// /* Reversed */
+ASM_SIZE_DIRECTIVE(Constants_exp_64_Q)
+
+.align 64
+Constants_exp_64_T1:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_T1,@object)
+data4 0x3F800000,0x3F8164D2,0x3F82CD87,0x3F843A29
+data4 0x3F85AAC3,0x3F871F62,0x3F88980F,0x3F8A14D5
+data4 0x3F8B95C2,0x3F8D1ADF,0x3F8EA43A,0x3F9031DC
+data4 0x3F91C3D3,0x3F935A2B,0x3F94F4F0,0x3F96942D
+data4 0x3F9837F0,0x3F99E046,0x3F9B8D3A,0x3F9D3EDA
+data4 0x3F9EF532,0x3FA0B051,0x3FA27043,0x3FA43516
+data4 0x3FA5FED7,0x3FA7CD94,0x3FA9A15B,0x3FAB7A3A
+data4 0x3FAD583F,0x3FAF3B79,0x3FB123F6,0x3FB311C4
+data4 0x3FB504F3,0x3FB6FD92,0x3FB8FBAF,0x3FBAFF5B
+data4 0x3FBD08A4,0x3FBF179A,0x3FC12C4D,0x3FC346CD
+data4 0x3FC5672A,0x3FC78D75,0x3FC9B9BE,0x3FCBEC15
+data4 0x3FCE248C,0x3FD06334,0x3FD2A81E,0x3FD4F35B
+data4 0x3FD744FD,0x3FD99D16,0x3FDBFBB8,0x3FDE60F5
+data4 0x3FE0CCDF,0x3FE33F89,0x3FE5B907,0x3FE8396A
+data4 0x3FEAC0C7,0x3FED4F30,0x3FEFE4BA,0x3FF28177
+data4 0x3FF5257D,0x3FF7D0DF,0x3FFA83B3,0x3FFD3E0C
+ASM_SIZE_DIRECTIVE(Constants_exp_64_T1)
+
+.align 64
+Constants_exp_64_T2:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_T2,@object)
+data4 0x3F800000,0x3F80058C,0x3F800B18,0x3F8010A4
+data4 0x3F801630,0x3F801BBD,0x3F80214A,0x3F8026D7
+data4 0x3F802C64,0x3F8031F2,0x3F803780,0x3F803D0E
+data4 0x3F80429C,0x3F80482B,0x3F804DB9,0x3F805349
+data4 0x3F8058D8,0x3F805E67,0x3F8063F7,0x3F806987
+data4 0x3F806F17,0x3F8074A8,0x3F807A39,0x3F807FCA
+data4 0x3F80855B,0x3F808AEC,0x3F80907E,0x3F809610
+data4 0x3F809BA2,0x3F80A135,0x3F80A6C7,0x3F80AC5A
+data4 0x3F80B1ED,0x3F80B781,0x3F80BD14,0x3F80C2A8
+data4 0x3F80C83C,0x3F80CDD1,0x3F80D365,0x3F80D8FA
+data4 0x3F80DE8F,0x3F80E425,0x3F80E9BA,0x3F80EF50
+data4 0x3F80F4E6,0x3F80FA7C,0x3F810013,0x3F8105AA
+data4 0x3F810B41,0x3F8110D8,0x3F81166F,0x3F811C07
+data4 0x3F81219F,0x3F812737,0x3F812CD0,0x3F813269
+data4 0x3F813802,0x3F813D9B,0x3F814334,0x3F8148CE
+data4 0x3F814E68,0x3F815402,0x3F81599C,0x3F815F37
+ASM_SIZE_DIRECTIVE(Constants_exp_64_T2)
+
+.align 64
+Constants_exp_64_W1:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_W1,@object)
+data4 0x00000000,0x00000000,0x171EC4B4,0xBE384454
+data4 0x4AA72766,0xBE694741,0xD42518F8,0xBE5D32B6
+data4 0x3A319149,0x3E68D96D,0x62415F36,0xBE68F4DA
+data4 0xC9C86A3B,0xBE6DDA2F,0xF49228FE,0x3E6B2E50
+data4 0x1188B886,0xBE49C0C2,0x1A4C2F1F,0x3E64BFC2
+data4 0x2CB98B54,0xBE6A2FBB,0x9A55D329,0x3E5DC5DE
+data4 0x39A7AACE,0x3E696490,0x5C66DBA5,0x3E54728B
+data4 0xBA1C7D7D,0xBE62B0DB,0x09F1AF5F,0x3E576E04
+data4 0x1A0DD6A1,0x3E612500,0x795FBDEF,0xBE66A419
+data4 0xE1BD41FC,0xBE5CDE8C,0xEA54964F,0xBE621376
+data4 0x476E76EE,0x3E6370BE,0x3427EB92,0x3E390D1A
+data4 0x2BF82BF8,0x3E1336DE,0xD0F7BD9E,0xBE5FF1CB
+data4 0x0CEB09DD,0xBE60A355,0x0980F30D,0xBE5CA37E
+data4 0x4C082D25,0xBE5C541B,0x3B467D29,0xBE5BBECA
+data4 0xB9D946C5,0xBE400D8A,0x07ED374A,0xBE5E2A08
+data4 0x365C8B0A,0xBE66CB28,0xD3403BCA,0x3E3AAD5B
+data4 0xC7EA21E0,0x3E526055,0xE72880D6,0xBE442C75
+data4 0x85222A43,0x3E58B2BB,0x522C42BF,0xBE5AAB79
+data4 0x469DC2BC,0xBE605CB4,0xA48C40DC,0xBE589FA7
+data4 0x1AA42614,0xBE51C214,0xC37293F4,0xBE48D087
+data4 0xA2D673E0,0x3E367A1C,0x114F7A38,0xBE51BEBB
+data4 0x661A4B48,0xBE6348E5,0x1D3B9962,0xBDF52643
+data4 0x35A78A53,0x3E3A3B5E,0x1CECD788,0xBE46C46C
+data4 0x7857D689,0xBE60B7EC,0xD14F1AD7,0xBE594D3D
+data4 0x4C9A8F60,0xBE4F9C30,0x02DFF9D2,0xBE521873
+data4 0x55E6D68F,0xBE5E4C88,0x667F3DC4,0xBE62140F
+data4 0x3BF88747,0xBE36961B,0xC96EC6AA,0x3E602861
+data4 0xD57FD718,0xBE3B5151,0xFC4A627B,0x3E561CD0
+data4 0xCA913FEA,0xBE3A5217,0x9A5D193A,0x3E40A3CC
+data4 0x10A9C312,0xBE5AB713,0xC5F57719,0x3E4FDADB
+data4 0xDBDF59D5,0x3E361428,0x61B4180D,0x3E5DB5DB
+data4 0x7408D856,0xBE42AD5F,0x31B2B707,0x3E2A3148
+ASM_SIZE_DIRECTIVE(Constants_exp_64_W1)
+
+.align 64
+Constants_exp_64_W2:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_W2,@object)
+data4 0x00000000,0x00000000,0x37A3D7A2,0xBE641F25
+data4 0xAD028C40,0xBE68DD57,0xF212B1B6,0xBE5C77D8
+data4 0x1BA5B070,0x3E57878F,0x2ECAE6FE,0xBE55A36A
+data4 0x569DFA3B,0xBE620608,0xA6D300A3,0xBE53B50E
+data4 0x223F8F2C,0x3E5B5EF2,0xD6DE0DF4,0xBE56A0D9
+data4 0xEAE28F51,0xBE64EEF3,0x367EA80B,0xBE5E5AE2
+data4 0x5FCBC02D,0x3E47CB1A,0x9BDAFEB7,0xBE656BA0
+data4 0x805AFEE7,0x3E6E70C6,0xA3415EBA,0xBE6E0509
+data4 0x49BFF529,0xBE56856B,0x00508651,0x3E66DD33
+data4 0xC114BC13,0x3E51165F,0xC453290F,0x3E53333D
+data4 0x05539FDA,0x3E6A072B,0x7C0A7696,0xBE47CD87
+data4 0xEB05C6D9,0xBE668BF4,0x6AE86C93,0xBE67C3E3
+data4 0xD0B3E84B,0xBE533904,0x556B53CE,0x3E63E8D9
+data4 0x63A98DC8,0x3E212C89,0x032A7A22,0xBE33138F
+data4 0xBC584008,0x3E530FA9,0xCCB93C97,0xBE6ADF82
+data4 0x8370EA39,0x3E5F9113,0xFB6A05D8,0x3E5443A4
+data4 0x181FEE7A,0x3E63DACD,0xF0F67DEC,0xBE62B29D
+data4 0x3DDE6307,0x3E65C483,0xD40A24C1,0x3E5BF030
+data4 0x14E437BE,0x3E658B8F,0xED98B6C7,0xBE631C29
+data4 0x04CF7C71,0x3E6335D2,0xE954A79D,0x3E529EED
+data4 0xF64A2FB8,0x3E5D9257,0x854ED06C,0xBE6BED1B
+data4 0xD71405CB,0x3E5096F6,0xACB9FDF5,0xBE3D4893
+data4 0x01B68349,0xBDFEB158,0xC6A463B9,0x3E628D35
+data4 0xADE45917,0xBE559725,0x042FC476,0xBE68C29C
+data4 0x01E511FA,0xBE67593B,0x398801ED,0xBE4A4313
+data4 0xDA7C3300,0x3E699571,0x08062A9E,0x3E5349BE
+data4 0x755BB28E,0x3E5229C4,0x77A1F80D,0x3E67E426
+data4 0x6B69C352,0xBE52B33F,0x084DA57F,0xBE6B3550
+data4 0xD1D09A20,0xBE6DB03F,0x2161B2C1,0xBE60CBC4
+data4 0x78A2B771,0x3E56ED9C,0x9D0FA795,0xBE508E31
+data4 0xFD1A54E9,0xBE59482A,0xB07FD23E,0xBE2A17CE
+data4 0x17365712,0x3E68BF5C,0xB3785569,0x3E3956F9
+ASM_SIZE_DIRECTIVE(Constants_exp_64_W2)
+
+GR_SAVE_PFS = r59
+GR_SAVE_B0 = r60
+GR_SAVE_GP = r61
+GR_Parameter_X = r62
+GR_Parameter_Y = r63
+GR_Parameter_RESULT = r64
+GR_Parameter_TAG = r65
+
+FR_X = f9
+FR_Y = f9
+FR_RESULT = f99
+
+.section .text
+.proc expm1l#
+.global expm1l#
+.align 64
+expm1l:
+#ifdef _LIBC
+.global __expm1l#
+__expm1l:
+#endif
+{ .mii
+alloc r32 = ar.pfs,0,30,4,0
+(p0) add r33 = 1, r0
+(p0) cmp.eq.unc p7, p0 = r0, r0
+}
+{ .mbb
+ nop.m 999
+(p0) br.cond.sptk exp_continue
+ nop.b 999 ;;
+}
+
+//
+// Set p7 true for expm1
+// Set Flag = r33 = 1 for expm1
+//
+
+.endp expm1l
+ASM_SIZE_DIRECTIVE(expm1l)
+
+.section .text
+.proc expl#
+.global expl#
+.align 64
+expl:
+#ifdef _LIBC
+.global __ieee754_expl#
+__ieee754_expl:
+#endif
+{ .mii
+alloc r32 = ar.pfs,0,30,4,0
+(p0) add r33 = r0, r0
+(p0) cmp.eq.unc p0, p7 = r0, r0 ;;
+}
+exp_continue:
+{ .mfi
+(p0) add r32 = 2,r0
+(p0) fnorm.s1 f9 = f8
+ nop.i 0
+}
+{ .mfi
+(p0) nop.m 0
+//
+// Set p7 false for exp
+// Set Flag = r33 = 0 for exp
+//
+(p0) fclass.m.unc p6, p8 = f8, 0x1E7
+ nop.i 0;;
+}
+{ .mfi
+ nop.m 999
+(p0) fclass.nm.unc p9, p0 = f8, 0x1FF
+ nop.i 0
+}
+{ .mfi
+ nop.m 999
+(p0) mov f36 = f1
+ nop.i 999 ;;
+}
+{ .mfb
+ nop.m 999
+//
+// Identify NatVals, NaNs, Infs, and Zeros.
+// Identify EM unsupporteds.
+// Save special input registers
+(p0) mov f32 = f0
+//
+// Create FR_X_cor = 0.0
+// GR_Flag = 0
+// GR_Expo_Range = 2 (r32) for double-extended precision
+// FR_Scale = 1.0
+//
+(p6) br.cond.spnt EXPL_64_SPECIAL ;;
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+(p9) br.cond.spnt EXPL_64_UNSUPPORTED ;;
+}
+{ .mfi
+(p0) cmp.ne.unc p12, p13 = 0x01, r33
+//
+// Branch out for special input values
+//
+(p0) fcmp.lt.unc.s0 p9,p0 = f8, f0
+(p0) cmp.eq.unc p15, p0 = r0, r0
+}
+{ .mmi
+ nop.m 999
+//
+// Raise possible denormal operand exception
+// Normalize x
+//
+// This function computes expl( x + x_cor)
+// Input FR 1: FR_X
+// Input FR 2: FR_X_cor
+// Input GR 1: GR_Flag
+// Input GR 2: GR_Expo_Range
+// Output FR 3: FR_Y_hi
+// Output FR 4: FR_Y_lo
+// Output FR 5: FR_Scale
+// Output PR 1: PR_Safe
+(p0) addl r34 = @ltoff(Constants_exp_64_Arg#),gp
+(p0) addl r40 = @ltoff(Constants_exp_64_W1#),gp
+};;
+//
+// Prepare to load constants
+// Set Safe = True
+//
+
+{ .mmi
+ ld8 r34 = [r34]
+ ld8 r40 = [r40]
+(p0) addl r41 = @ltoff(Constants_exp_64_W2#),gp
+};;
+
+{ .mmi
+(p0) ldfe f37 = [r34],16
+(p0) ld8 r41 = [r41] ;;
+}
+
+//
+// N = fcvt.fx(float_N)
+// Set p14 if -6 > expo_X
+//
+//
+// Bias = 0x0FFFF
+// expo_X = expo_X and Mask
+//
+
+{ .mmi
+(p0) ldfe f40 = [r34],16
+ nop.m 999
+//
+// Load L_lo
+// Set p10 if 14 < expo_X
+//
+(p0) addl r50 = @ltoff(Constants_exp_64_T1#),gp
+}
+{ .mmi
+ nop.m 999
+ nop.m 999
+(p0) addl r51 = @ltoff(Constants_exp_64_T2#),gp ;;
+}
+//
+// Load W2_ptr
+// Branch to SMALL is expo_X < -6
+//
+
+{.mmi
+(p0) ld8 r50 = [r50]
+(p0) ld8 r51 = [r51]
+};;
+
+{ .mlx
+(p0) ldfe f41 = [r34],16
+//
+// float_N = X * L_Inv
+// expo_X = exponent of X
+// Mask = 0x1FFFF
+//
+(p0) movl r58 = 0x0FFFF
+}
+{ .mlx
+ nop.m 999
+(p0) movl r39 = 0x1FFFF ;;
+}
+{ .mmi
+(p0) getf.exp r37 = f9
+ nop.m 999
+(p0) addl r34 = @ltoff(Constants_exp_64_Exponents#),gp ;;
+}
+{ .mii
+(p0) ld8 r34 = [r34]
+ nop.i 999
+(p0) and r37 = r37, r39 ;;
+}
+{ .mmi
+(p0) sub r37 = r37, r58 ;;
+(p0) cmp.gt.unc p14, p0 = -6, r37
+(p0) cmp.lt.unc p10, p0 = 14, r37 ;;
+}
+{ .mfi
+(p0) nop.m 0
+//
+// Load L_inv
+// Set p12 true for Flag = 0 (exp)
+// Set p13 true for Flag = 1 (expm1)
+//
+(p0) fmpy.s1 f38 = f9, f37
+ nop.i 999 ;;
+}
+{ .mfb
+ nop.m 999
+//
+// Load L_hi
+// expo_X = expo_X - Bias
+// get W1_ptr
+//
+(p0) fcvt.fx.s1 f39 = f38
+(p14) br.cond.spnt EXPL_SMALL ;;
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+(p10) br.cond.spnt EXPL_HUGE ;;
+}
+{ .mmi
+(p0) shladd r34 = r32,4,r34
+ nop.m 999
+(p0) addl r35 = @ltoff(Constants_exp_64_A#),gp ;;
+}
+//
+// Load T_1,T_2
+//
+{ .mmi
+ nop.m 999
+ ld8 r35 =[r35]
+ nop.i 99
+};;
+{ .mmb
+(p0) ldfe f51 = [r35],16
+(p0) ld8 r45 = [r34],8
+ nop.b 999 ;;
+}
+//
+// Set Safe = True if k >= big_expo_neg
+// Set Safe = False if k < big_expo_neg
+//
+{ .mmb
+(p0) ldfe f49 = [r35],16
+(p0) ld8 r48 = [r34],0
+ nop.b 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Branch to HUGE is expo_X > 14
+//
+(p0) fcvt.xf f38 = f39
+ nop.i 999 ;;
+}
+{ .mfi
+(p0) getf.sig r52 = f39
+ nop.f 999
+ nop.i 999 ;;
+}
+{ .mii
+ nop.m 999
+(p0) extr.u r43 = r52, 6, 6 ;;
+//
+// r = r - float_N * L_lo
+// K = extr(N_fix,12,52)
+//
+(p0) shladd r40 = r43,3,r40 ;;
+}
+{ .mfi
+(p0) shladd r50 = r43,2,r50
+(p0) fnma.s1 f42 = f40, f38, f9
+//
+// float_N = float(N)
+// N_fix = signficand N
+//
+(p0) extr.u r42 = r52, 0, 6
+}
+{ .mmi
+(p0) ldfd f43 = [r40],0 ;;
+(p0) shladd r41 = r42,3,r41
+(p0) shladd r51 = r42,2,r51
+}
+//
+// W_1_p1 = 1 + W_1
+//
+{ .mmi
+(p0) ldfs f44 = [r50],0 ;;
+(p0) ldfd f45 = [r41],0
+//
+// M_2 = extr(N_fix,0,6)
+// M_1 = extr(N_fix,6,6)
+// r = X - float_N * L_hi
+//
+(p0) extr r44 = r52, 12, 52
+}
+{ .mmi
+(p0) ldfs f46 = [r51],0 ;;
+(p0) sub r46 = r58, r44
+(p0) cmp.gt.unc p8, p15 = r44, r45
+}
+//
+// W = W_1 + W_1_p1*W_2
+// Load A_2
+// Bias_m_K = Bias - K
+//
+{ .mii
+(p0) ldfe f40 = [r35],16
+//
+// load A_1
+// poly = A_2 + r*A_3
+// rsq = r * r
+// neg_2_mK = exponent of Bias_m_k
+//
+(p0) add r47 = r58, r44 ;;
+//
+// Set Safe = True if k <= big_expo_pos
+// Set Safe = False if k > big_expo_pos
+// Load A_3
+//
+(p15) cmp.lt p8,p15 = r44,r48 ;;
+}
+{ .mmf
+(p0) setf.exp f61 = r46
+//
+// Bias_p + K = Bias + K
+// T = T_1 * T_2
+//
+(p0) setf.exp f36 = r47
+(p0) fnma.s1 f42 = f41, f38, f42 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Load W_1,W_2
+// Load big_exp_pos, load big_exp_neg
+//
+(p0) fadd.s1 f47 = f43, f1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fma.s1 f52 = f42, f51, f49
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fmpy.s1 f48 = f42, f42
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fmpy.s1 f53 = f44, f46
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fma.s1 f54 = f45, f47, f43
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fneg f61 = f61
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fma.s1 f52 = f42, f52, f40
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fadd.s1 f55 = f54, f1
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+// W + Wp1 * poly
+//
+(p0) mov f34 = f53
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// A_1 + r * poly
+// Scale = setf_expl(Bias_p_k)
+//
+(p0) fma.s1 f52 = f48, f52, f42
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// poly = r + rsq(A_1 + r*poly)
+// Wp1 = 1 + W
+// neg_2_mK = -neg_2_mK
+//
+(p0) fma.s1 f35 = f55, f52, f54
+ nop.i 999 ;;
+}
+{ .mfb
+ nop.m 999
+(p0) fmpy.s1 f35 = f35, f53
+//
+// Y_hi = T
+// Y_lo = T * (W + Wp1*poly)
+//
+(p12) br.cond.sptk EXPL_MAIN ;;
+}
+//
+// Branch if expl(x)
+// Continue for expl(x-1)
+//
+{ .mii
+(p0) cmp.lt.unc p12, p13 = 10, r44
+ nop.i 999 ;;
+//
+// Set p12 if 10 < K, Else p13
+//
+(p13) cmp.gt.unc p13, p14 = -10, r44 ;;
+}
+//
+// K > 10: Y_lo = Y_lo + neg_2_mK
+// K <=10: Set p13 if -10 > K, Else set p14
+//
+{ .mfi
+(p13) cmp.eq p15, p0 = r0, r0
+(p14) fadd.s1 f34 = f61, f34
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p12) fadd.s1 f35 = f35, f61
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p13) fadd.s1 f35 = f35, f34
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+//
+// K <= 10 and K < -10, Set Safe = True
+// K <= 10 and K < 10, Y_lo = Y_hi + Y_lo
+// K <= 10 and K > =-10, Y_hi = Y_hi + neg_2_mk
+//
+(p13) mov f34 = f61
+(p0) br.cond.sptk EXPL_MAIN ;;
+}
+EXPL_SMALL:
+{ .mmi
+ nop.m 999
+(p0) addl r34 = @ltoff(Constants_exp_64_Exponents#),gp
+(p12) addl r35 = @ltoff(Constants_exp_64_P#),gp ;;
+}
+.pred.rel "mutex",p12,p13
+{ .mmi
+(p12) ld8 r35=[r35]
+nop.m 999
+(p13) addl r35 = @ltoff(Constants_exp_64_Q#),gp
+};;
+{ .mmi
+(p13) ld8 r35=[r35]
+(p0) ld8 r34=[r34]
+nop.i 999
+};;
+{ .mfi
+(p0) add r34 = 0x48,r34
+//
+// Return
+// K <= 10 and K < 10, Y_hi = neg_2_mk
+//
+// /*******************************************************/
+// /*********** Branch EXPL_SMALL ************************/
+// /*******************************************************/
+(p0) mov f42 = f9
+ nop.i 999 ;;
+}
+//
+// Flag = 0
+// r4 = rsq * rsq
+//
+{ .mfi
+(p0) ld8 r49 =[r34],0
+ nop.f 999
+ nop.i 999 ;;
+}
+{ .mii
+ nop.m 999
+ nop.i 999 ;;
+//
+// Flag = 1
+//
+(p0) cmp.lt.unc p14, p0 = r37, r49 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// r = X
+//
+(p0) fmpy.s1 f48 = f42, f42
+ nop.i 999 ;;
+}
+{ .mfb
+ nop.m 999
+//
+// rsq = r * r
+//
+(p0) fmpy.s1 f50 = f48, f48
+//
+// Is input very small?
+//
+(p14) br.cond.spnt EXPL_VERY_SMALL ;;
+}
+//
+// Flag_not1: Y_hi = 1.0
+// Flag is 1: r6 = rsq * r4
+//
+{ .mfi
+(p12) ldfe f52 = [r35],16
+(p12) mov f34 = f1
+(p0) add r53 = 0x1,r0 ;;
+}
+{ .mfi
+(p13) ldfe f51 = [r35],16
+//
+// Flag_not_1: Y_lo = poly_hi + r4 * poly_lo
+//
+(p13) mov f34 = f9
+ nop.i 999 ;;
+}
+{ .mmf
+(p12) ldfe f53 = [r35],16
+//
+// For Flag_not_1, Y_hi = X
+// Scale = 1
+// Create 0x000...01
+//
+(p0) setf.sig f37 = r53
+(p0) mov f36 = f1 ;;
+}
+{ .mmi
+(p13) ldfe f52 = [r35],16 ;;
+(p12) ldfe f54 = [r35],16
+ nop.i 999 ;;
+}
+{ .mfi
+(p13) ldfe f53 = [r35],16
+(p13) fmpy.s1 f58 = f48, f50
+ nop.i 999 ;;
+}
+//
+// Flag_not1: poly_lo = P_5 + r*P_6
+// Flag_1: poly_lo = Q_6 + r*Q_7
+//
+{ .mmi
+(p13) ldfe f54 = [r35],16 ;;
+(p12) ldfe f55 = [r35],16
+ nop.i 999 ;;
+}
+{ .mmi
+(p12) ldfe f56 = [r35],16 ;;
+(p13) ldfe f55 = [r35],16
+ nop.i 999 ;;
+}
+{ .mmi
+(p12) ldfe f57 = [r35],0 ;;
+(p13) ldfe f56 = [r35],16
+ nop.i 999 ;;
+}
+{ .mfi
+(p13) ldfe f57 = [r35],0
+ nop.f 999
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// For Flag_not_1, load p5,p6,p1,p2
+// Else load p5,p6,p1,p2
+//
+(p12) fma.s1 f60 = f52, f42, f53
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p13) fma.s1 f60 = f51, f42, f52
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p12) fma.s1 f60 = f60, f42, f54
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p12) fma.s1 f59 = f56, f42, f57
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p13) fma.s1 f60 = f42, f60, f53
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p12) fma.s1 f59 = f59, f48, f42
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Flag_1: poly_lo = Q_5 + r*(Q_6 + r*Q_7)
+// Flag_not1: poly_lo = P_4 + r*(P_5 + r*P_6)
+// Flag_not1: poly_hi = (P_1 + r*P_2)
+//
+(p13) fmpy.s1 f60 = f60, f58
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p12) fma.s1 f60 = f60, f42, f55
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Flag_1: poly_lo = r6 *(Q_5 + ....)
+// Flag_not1: poly_hi = r + rsq *(P_1 + r*P_2)
+//
+(p12) fma.s1 f35 = f60, f50, f59
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p13) fma.s1 f59 = f54, f42, f55
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Flag_not1: Y_lo = rsq* poly_hi + poly_lo
+// Flag_1: poly_lo = rsq* poly_hi + poly_lo
+//
+(p13) fma.s1 f59 = f59, f42, f56
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Flag_not_1: (P_1 + r*P_2)
+//
+(p13) fma.s1 f59 = f59, f42, f57
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Flag_not_1: poly_hi = r + rsq * (P_1 + r*P_2)
+//
+(p13) fma.s1 f35 = f59, f48, f60
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Create 0.000...01
+//
+(p0) for f37 = f35, f37
+ nop.i 999 ;;
+}
+{ .mfb
+ nop.m 999
+//
+// Set lsb of Y_lo to 1
+//
+(p0) fmerge.se f35 = f35,f37
+(p0) br.cond.sptk EXPL_MAIN ;;
+}
+EXPL_VERY_SMALL:
+{ .mmi
+ nop.m 999
+ nop.m 999
+(p13) addl r34 = @ltoff(Constants_exp_64_Exponents#),gp
+}
+{ .mfi
+ nop.m 999
+(p12) mov f35 = f9
+ nop.i 999 ;;
+}
+{ .mfb
+(p13) ld8 r34 = [r34]
+(p12) mov f34 = f1
+(p12) br.cond.sptk EXPL_MAIN ;;
+}
+{ .mlx
+(p13) add r34 = 8,r34
+(p13) movl r39 = 0x0FFFE ;;
+}
+//
+// Load big_exp_neg
+// Create 1/2's exponent
+//
+{ .mii
+(p13) setf.exp f56 = r39
+(p13) shladd r34 = r32,4,r34 ;;
+ nop.i 999
+}
+//
+// Negative exponents are stored after positive
+//
+{ .mfi
+(p13) ld8 r45 = [r34],0
+//
+// Y_hi = x
+// Scale = 1
+//
+(p13) fmpy.s1 f35 = f9, f9
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Reset Safe if necessary
+// Create 1/2
+//
+(p13) mov f34 = f9
+ nop.i 999 ;;
+}
+{ .mfi
+(p13) cmp.lt.unc p0, p15 = r37, r45
+(p13) mov f36 = f1
+ nop.i 999 ;;
+}
+{ .mfb
+ nop.m 999
+//
+// Y_lo = x * x
+//
+(p13) fmpy.s1 f35 = f35, f56
+//
+// Y_lo = x*x/2
+//
+(p13) br.cond.sptk EXPL_MAIN ;;
+}
+EXPL_HUGE:
+{ .mfi
+ nop.m 999
+(p0) fcmp.gt.unc.s1 p14, p0 = f9, f0
+ nop.i 999
+}
+{ .mlx
+ nop.m 999
+(p0) movl r39 = 0x15DC0 ;;
+}
+{ .mfi
+(p14) setf.exp f34 = r39
+(p14) mov f35 = f1
+(p14) cmp.eq p0, p15 = r0, r0 ;;
+}
+{ .mfb
+ nop.m 999
+(p14) mov f36 = f34
+//
+// If x > 0, Set Safe = False
+// If x > 0, Y_hi = 2**(24,000)
+// If x > 0, Y_lo = 1.0
+// If x > 0, Scale = 2**(24,000)
+//
+(p14) br.cond.sptk EXPL_MAIN ;;
+}
+{ .mlx
+ nop.m 999
+(p12) movl r39 = 0xA240
+}
+{ .mlx
+ nop.m 999
+(p12) movl r38 = 0xA1DC ;;
+}
+{ .mmb
+(p13) cmp.eq p15, p14 = r0, r0
+(p12) setf.exp f34 = r39
+ nop.b 999 ;;
+}
+{ .mlx
+(p12) setf.exp f35 = r38
+(p13) movl r39 = 0xFF9C
+}
+{ .mfi
+ nop.m 999
+(p13) fsub.s1 f34 = f0, f1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p12) mov f36 = f34
+(p12) cmp.eq p0, p15 = r0, r0 ;;
+}
+{ .mfi
+(p13) setf.exp f35 = r39
+(p13) mov f36 = f1
+ nop.i 999 ;;
+}
+EXPL_MAIN:
+{ .mfi
+(p0) cmp.ne.unc p12, p0 = 0x01, r33
+(p0) fmpy.s1 f101 = f36, f35
+ nop.i 999 ;;
+}
+{ .mfb
+ nop.m 999
+(p0) fma.s0 f99 = f34, f36, f101
+(p15) br.cond.sptk EXPL_64_RETURN ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fsetc.s3 0x7F,0x01
+ nop.i 999
+}
+{ .mlx
+ nop.m 999
+(p0) movl r50 = 0x00000000013FFF ;;
+}
+//
+// S0 user supplied status
+// S2 user supplied status + WRE + TD (Overflows)
+// S3 user supplied status + RZ + TD (Underflows)
+//
+//
+// If (Safe) is true, then
+// Compute result using user supplied status field.
+// No overflow or underflow here, but perhaps inexact.
+// Return
+// Else
+// Determine if overflow or underflow was raised.
+// Fetch +/- overflow threshold for IEEE single, double,
+// double extended
+//
+{ .mfi
+(p0) setf.exp f60 = r50
+(p0) fma.s3 f102 = f34, f36, f101
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fsetc.s3 0x7F,0x40
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// For Safe, no need to check for over/under.
+// For expm1, handle errors like exp.
+//
+(p0) fsetc.s2 0x7F,0x42
+ nop.i 999;;
+}
+{ .mfi
+ nop.m 999
+(p0) fma.s2 f100 = f34, f36, f101
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fsetc.s2 0x7F,0x40
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p7) fclass.m.unc p12, p0 = f102, 0x00F
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p11, p0 = f102, 0x00F
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p7) fcmp.ge.unc.s1 p10, p0 = f100, f60
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+// Create largest double exponent + 1.
+// Create smallest double exponent - 1.
+//
+(p0) fcmp.ge.unc.s1 p8, p0 = f100, f60
+ nop.i 999 ;;
+}
+//
+// fcmp: resultS2 >= + overflow threshold -> set (a) if true
+// fcmp: resultS2 <= - overflow threshold -> set (b) if true
+// fclass: resultS3 is denorm/unorm/0 -> set (d) if true
+//
+{ .mib
+(p10) mov GR_Parameter_TAG = 39
+ nop.i 999
+(p10) br.cond.sptk __libm_error_region ;;
+}
+{ .mib
+(p8) mov GR_Parameter_TAG = 12
+ nop.i 999
+(p8) br.cond.sptk __libm_error_region ;;
+}
+//
+// Report that exp overflowed
+//
+{ .mib
+(p12) mov GR_Parameter_TAG = 40
+ nop.i 999
+(p12) br.cond.sptk __libm_error_region ;;
+}
+{ .mib
+(p11) mov GR_Parameter_TAG = 13
+ nop.i 999
+(p11) br.cond.sptk __libm_error_region ;;
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+//
+// Report that exp underflowed
+//
+(p0) br.cond.sptk EXPL_64_RETURN ;;
+}
+EXPL_64_SPECIAL:
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p6, p0 = f8, 0x0c3
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p13, p8 = f8, 0x007
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p7) fclass.m.unc p14, p0 = f8, 0x007
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p12, p9 = f8, 0x021
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p11, p0 = f8, 0x022
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p7) fclass.m.unc p10, p0 = f8, 0x022
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Identify +/- 0, Inf, or -Inf
+// Generate the right kind of NaN.
+//
+(p13) fadd.s0 f99 = f0, f1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p14) mov f99 = f8
+ nop.i 999 ;;
+}
+{ .mfb
+ nop.m 999
+(p6) fadd.s0 f99 = f8, f1
+//
+// expl(+/-0) = 1
+// expm1l(+/-0) = +/-0
+// No exceptions raised
+//
+(p6) br.cond.sptk EXPL_64_RETURN ;;
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+(p14) br.cond.sptk EXPL_64_RETURN ;;
+}
+{ .mfi
+ nop.m 999
+(p11) mov f99 = f0
+ nop.i 999 ;;
+}
+{ .mfb
+ nop.m 999
+(p10) fsub.s1 f99 = f0, f1
+//
+// expl(-Inf) = 0
+// expm1l(-Inf) = -1
+// No exceptions raised.
+//
+(p10) br.cond.sptk EXPL_64_RETURN ;;
+}
+{ .mfb
+ nop.m 999
+(p12) fmpy.s1 f99 = f8, f1
+//
+// expl(+Inf) = Inf
+// No exceptions raised.
+//
+(p0) br.cond.sptk EXPL_64_RETURN ;;
+}
+EXPL_64_UNSUPPORTED:
+{ .mfb
+ nop.m 999
+(p0) fmpy.s0 f99 = f8, f0
+(p0) br.cond.sptk EXPL_64_RETURN ;;
+}
+EXPL_64_RETURN:
+{ .mfb
+ nop.m 999
+(p0) mov f8 = f99
+(p0) br.ret.sptk b0
+}
+.endp
+ASM_SIZE_DIRECTIVE(expl)
+
+.proc __libm_error_region
+__libm_error_region:
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfe [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mib
+ stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
+}
+{ .mib
+ stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+{ .mmi
+ ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/s_floor.S b/sysdeps/ia64/fpu/s_floor.S
new file mode 100644
index 0000000..5a63a3c
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_floor.S
@@ -0,0 +1,227 @@
+.file "floor.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+.align 32
+.global floor#
+
+.section .text
+.proc floor#
+.align 32
+
+// History
+//==============================================================
+// 2/02/00: Initial version
+// 3/22/00: Updated to improve performance
+// 6/13/00: Improved speed, fixed setting of inexact flag
+// 6/27/00: Eliminated incorrect invalid flag setting
+// 2/07/01: Corrected sign of zero result in round to -inf mode
+
+// API
+//==============================================================
+// double floor(double x)
+
+// general input registers:
+
+floor_GR_FFFF = r14
+floor_GR_signexp = r15
+floor_GR_exponent = r16
+floor_GR_expmask = r17
+floor_GR_bigexp = r18
+
+
+// predicate registers used:
+
+// p6 ==> Input is NaN, infinity, zero
+// p7 ==> Input is denormal
+// p8 ==> Input is <0
+// p9 ==> Input is >=0
+// p10 ==> Input is already an integer (bigger than largest integer)
+// p11 ==> Input is not a large integer
+// p12 ==> Input is a smaller integer
+// p13 ==> Input is not an even integer, so inexact must be set
+
+
+// floating-point registers used:
+
+FLOOR_NORM_f8 = f9
+FLOOR_FFFF = f10
+FLOOR_INEXACT = f11
+FLOOR_FLOAT_INT_f8 = f12
+FLOOR_INT_f8 = f13
+FLOOR_adj = f14
+
+// Overview of operation
+//==============================================================
+
+// double floor(double x)
+// Return an integer value (represented as a double) that is the largest
+// value not greater than x
+// This is x rounded toward -infinity to an integral value.
+// Inexact is set if x != floor(x)
+// **************************************************************************
+
+// Set denormal flag for denormal input and
+// and take denormal fault if necessary.
+
+// Is the input an integer value already?
+
+// double_extended
+// if the exponent is > 1003e => 3F(true) = 63(decimal)
+// we have a significand of 64 bits 1.63-bits.
+// If we multiply by 2^63, we no longer have a fractional part
+// So input is an integer value already.
+
+// double
+// if the exponent is >= 10033 => 34(true) = 52(decimal)
+// 34 + 3ff = 433
+// we have a significand of 53 bits 1.52-bits. (implicit 1)
+// If we multiply by 2^52, we no longer have a fractional part
+// So input is an integer value already.
+
+// single
+// if the exponent is > 10016 => 17(true) = 23(decimal)
+// we have a significand of 24 bits 1.23-bits. (implicit 1)
+// If we multiply by 2^23, we no longer have a fractional part
+// So input is an integer value already.
+
+// If x is NAN, ZERO, or INFINITY, then return
+
+// qnan snan inf norm unorm 0 -+
+// 1 1 1 0 0 1 11 0xe7
+
+#include "libm_support.h"
+
+floor:
+#ifdef _LIBC
+.global __floor
+__floor:
+#endif
+
+{ .mfi
+ getf.exp floor_GR_signexp = f8
+ fcvt.fx.trunc.s1 FLOOR_INT_f8 = f8
+ addl floor_GR_bigexp = 0x10033, r0
+}
+{ .mfi
+ addl floor_GR_FFFF = -1,r0
+ fcmp.lt.s1 p8,p9 = f8,f0
+ mov floor_GR_expmask = 0x1FFFF ;;
+}
+
+// p7 ==> denorm
+{ .mfi
+ setf.sig FLOOR_FFFF = floor_GR_FFFF
+ fclass.m p7,p0 = f8, 0x0b
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fnorm.s1 FLOOR_NORM_f8 = f8
+ nop.i 999 ;;
+}
+
+// p6 ==> NAN, INF, ZERO
+{ .mfb
+ nop.m 999
+ fclass.m p6,p10 = f8, 0xe7
+(p7) br.cond.spnt L(FLOOR_DENORM) ;;
+}
+
+L(FLOOR_COMMON):
+.pred.rel "mutex",p8,p9
+// Set adjustment to subtract from trunc(x) for result
+// If x<0, adjustment is -1.0
+// If x>=0, adjustment is 0.0
+{ .mfi
+ and floor_GR_exponent = floor_GR_signexp, floor_GR_expmask
+(p8) fnma.s1 FLOOR_adj = f1,f1,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p9) fadd.s1 FLOOR_adj = f0,f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+ fcmp.eq.s0 p12,p0 = f8,f0 // Dummy op to set denormal and invalid flag
+ nop.i 999
+}
+{ .mfi
+(p10) cmp.ge.unc p10,p11 = floor_GR_exponent, floor_GR_bigexp
+(p6) fnorm.d f8 = f8
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p11) fcvt.xf FLOOR_FLOAT_INT_f8 = FLOOR_INT_f8
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p10) fnorm.d f8 = FLOOR_NORM_f8
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p11) fadd.d f8 = FLOOR_FLOAT_INT_f8,FLOOR_adj
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p11) fcmp.eq.unc.s1 p12,p13 = FLOOR_FLOAT_INT_f8, FLOOR_NORM_f8
+ nop.i 999 ;;
+}
+
+// Set inexact if result not equal to input
+{ .mfi
+ nop.m 999
+(p13) fmpy.s0 FLOOR_INEXACT = FLOOR_FFFF,FLOOR_FFFF
+ nop.i 999
+}
+// Set result to input if integer
+{ .mfb
+ nop.m 999
+(p12) fnorm.d f8 = FLOOR_NORM_f8
+ br.ret.sptk b0 ;;
+}
+
+// Here if input denorm
+L(FLOOR_DENORM):
+{ .mfb
+ getf.exp floor_GR_signexp = FLOOR_NORM_f8
+ fcvt.fx.trunc.s1 FLOOR_INT_f8 = FLOOR_NORM_f8
+ br.cond.sptk L(FLOOR_COMMON) ;;
+}
+
+.endp floor
+ASM_SIZE_DIRECTIVE(floor)
diff --git a/sysdeps/ia64/fpu/s_floorf.S b/sysdeps/ia64/fpu/s_floorf.S
new file mode 100644
index 0000000..92d58f1
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_floorf.S
@@ -0,0 +1,224 @@
+.file "floorf.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+.align 32
+.global floorf#
+
+.section .text
+.proc floorf#
+.align 32
+
+// History
+//==============================================================
+// 2/02/00: Initial version
+// 6/13/00: Improved speed
+// 6/27/00: Eliminated incorrect invalid flag setting
+// 2/07/01: Corrected sign of zero result in round to -inf mode
+
+// API
+//==============================================================
+// float floorf(float x)
+
+// general input registers:
+
+floor_GR_FFFF = r14
+floor_GR_signexp = r15
+floor_GR_exponent = r16
+floor_GR_expmask = r17
+floor_GR_bigexp = r18
+
+
+// predicate registers used:
+
+// p6 ==> Input is NaN, infinity, zero
+// p7 ==> Input is denormal
+// p8 ==> Input is <0
+// p9 ==> Input is >=0
+// p10 ==> Input is already an integer (bigger than largest integer)
+// p11 ==> Input is not a large integer
+// p12 ==> Input is a smaller integer
+// p13 ==> Input is not an even integer, so inexact must be set
+
+
+// floating-point registers used:
+
+FLOOR_NORM_f8 = f9
+FLOOR_FFFF = f10
+FLOOR_INEXACT = f11
+FLOOR_FLOAT_INT_f8 = f12
+FLOOR_INT_f8 = f13
+FLOOR_adj = f14
+
+// Overview of operation
+//==============================================================
+
+// float floorf(float x)
+// Return an integer value (represented as a float) that is the largest
+// value not greater than x
+// This is x rounded toward -infinity to an integral value.
+// Inexact is set if x != floorf(x)
+// **************************************************************************
+
+// Set denormal flag for denormal input and
+// and take denormal fault if necessary.
+
+// Is the input an integer value already?
+
+// double_extended
+// if the exponent is > 1003e => 3F(true) = 63(decimal)
+// we have a significand of 64 bits 1.63-bits.
+// If we multiply by 2^63, we no longer have a fractional part
+// So input is an integer value already.
+
+// double
+// if the exponent is >= 10033 => 34(true) = 52(decimal)
+// 34 + 3ff = 433
+// we have a significand of 53 bits 1.52-bits. (implicit 1)
+// If we multiply by 2^52, we no longer have a fractional part
+// So input is an integer value already.
+
+// single
+// if the exponent is > 10016 => 17(true) = 23(decimal)
+// we have a significand of 24 bits 1.23-bits. (implicit 1)
+// If we multiply by 2^23, we no longer have a fractional part
+// So input is an integer value already.
+
+// If x is NAN, ZERO, or INFINITY, then return
+
+// qnan snan inf norm unorm 0 -+
+// 1 1 1 0 0 1 11 0xe7
+
+#include "libm_support.h"
+
+floorf:
+#ifdef _LIBC
+.global __floorf
+__floorf:
+#endif
+
+{ .mfi
+ getf.exp floor_GR_signexp = f8
+ fcvt.fx.trunc.s1 FLOOR_INT_f8 = f8
+ addl floor_GR_bigexp = 0x10016, r0
+}
+{ .mfi
+ addl floor_GR_FFFF = -1,r0
+ fcmp.lt.s1 p8,p9 = f8,f0
+ mov floor_GR_expmask = 0x1FFFF ;;
+}
+
+// p7 ==> denorm
+{ .mfi
+ setf.sig FLOOR_FFFF = floor_GR_FFFF
+ fclass.m p7,p0 = f8, 0x0b
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fnorm.s1 FLOOR_NORM_f8 = f8
+ nop.i 999 ;;
+}
+
+// p6 ==> NAN, INF, ZERO
+{ .mfb
+ nop.m 999
+ fclass.m p6,p10 = f8, 0xe7
+(p7) br.cond.spnt L(FLOOR_DENORM) ;;
+}
+
+L(FLOOR_COMMON):
+.pred.rel "mutex",p8,p9
+// Set adjustment to subtract from trunc(x) for result
+// If x<0, adjustment is -1.0
+// If x>=0, adjustment is 0.0
+{ .mfi
+ and floor_GR_exponent = floor_GR_signexp, floor_GR_expmask
+(p8) fnma.s1 FLOOR_adj = f1,f1,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p9) fadd.s1 FLOOR_adj = f0,f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+ fcmp.eq.s0 p12,p0 = f8,f0 // Dummy op to set denormal and invalid flag
+ nop.i 999
+}
+{ .mfi
+(p10) cmp.ge.unc p10,p11 = floor_GR_exponent, floor_GR_bigexp
+(p6) fnorm.s f8 = f8
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p11) fcvt.xf FLOOR_FLOAT_INT_f8 = FLOOR_INT_f8
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p10) fnorm.s f8 = FLOOR_NORM_f8
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p11) fadd.s f8 = FLOOR_FLOAT_INT_f8,FLOOR_adj
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p11) fcmp.eq.unc.s1 p12,p13 = FLOOR_FLOAT_INT_f8, FLOOR_NORM_f8
+ nop.i 999 ;;
+}
+
+// Set inexact if result not equal to input
+{ .mfi
+ nop.m 999
+(p13) fmpy.s0 FLOOR_INEXACT = FLOOR_FFFF,FLOOR_FFFF
+ nop.i 999
+}
+// Set result to input if integer
+{ .mfb
+ nop.m 999
+(p12) fnorm.s f8 = FLOOR_NORM_f8
+ br.ret.sptk b0 ;;
+}
+
+// Here if input denorm
+L(FLOOR_DENORM):
+{ .mfb
+ getf.exp floor_GR_signexp = FLOOR_NORM_f8
+ fcvt.fx.trunc.s1 FLOOR_INT_f8 = FLOOR_NORM_f8
+ br.cond.sptk L(FLOOR_COMMON) ;;
+}
+
+.endp floorf
+ASM_SIZE_DIRECTIVE(floorf)
diff --git a/sysdeps/ia64/fpu/s_floorl.S b/sysdeps/ia64/fpu/s_floorl.S
new file mode 100644
index 0000000..241b2ef
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_floorl.S
@@ -0,0 +1,224 @@
+.file "floorl.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+.align 32
+.global floorl#
+
+.section .text
+.proc floorl#
+.align 32
+
+// History
+//==============================================================
+// 2/02/00: Initial version
+// 6/13/00: Improved speed
+// 6/27/00: Eliminated incorrect invalid flag setting
+// 2/07/01: Corrected sign of zero result in round to -inf mode
+
+// API
+//==============================================================
+// long double floorl(long double x)
+
+// general input registers:
+
+floor_GR_FFFF = r14
+floor_GR_signexp = r15
+floor_GR_exponent = r16
+floor_GR_expmask = r17
+floor_GR_bigexp = r18
+
+
+// predicate registers used:
+
+// p6 ==> Input is NaN, infinity, zero
+// p7 ==> Input is denormal
+// p8 ==> Input is <0
+// p9 ==> Input is >=0
+// p10 ==> Input is already an integer (bigger than largest integer)
+// p11 ==> Input is not a large integer
+// p12 ==> Input is a smaller integer
+// p13 ==> Input is not an even integer, so inexact must be set
+
+
+// floating-point registers used:
+
+FLOOR_NORM_f8 = f9
+FLOOR_FFFF = f10
+FLOOR_INEXACT = f11
+FLOOR_FLOAT_INT_f8 = f12
+FLOOR_INT_f8 = f13
+FLOOR_adj = f14
+
+// Overview of operation
+//==============================================================
+
+// long double floorl(long double x)
+// Return an integer value (represented as a long double) that is the largest
+// value not greater than x
+// This is x rounded toward -infinity to an integral value.
+// Inexact is set if x != floorl(x)
+// **************************************************************************
+
+// Set denormal flag for denormal input and
+// and take denormal fault if necessary.
+
+// Is the input an integer value already?
+
+// double_extended
+// if the exponent is > 1003e => 3F(true) = 63(decimal)
+// we have a significand of 64 bits 1.63-bits.
+// If we multiply by 2^63, we no longer have a fractional part
+// So input is an integer value already.
+
+// double
+// if the exponent is >= 10033 => 34(true) = 52(decimal)
+// 34 + 3ff = 433
+// we have a significand of 53 bits 1.52-bits. (implicit 1)
+// If we multiply by 2^52, we no longer have a fractional part
+// So input is an integer value already.
+
+// single
+// if the exponent is > 10016 => 17(true) = 23(decimal)
+// we have a significand of 24 bits 1.23-bits. (implicit 1)
+// If we multiply by 2^23, we no longer have a fractional part
+// So input is an integer value already.
+
+// If x is NAN, ZERO, or INFINITY, then return
+
+// qnan snan inf norm unorm 0 -+
+// 1 1 1 0 0 1 11 0xe7
+
+#include "libm_support.h"
+
+floorl:
+#ifdef _LIBC
+.global __floorl
+__floorl:
+#endif
+
+{ .mfi
+ getf.exp floor_GR_signexp = f8
+ fcvt.fx.trunc.s1 FLOOR_INT_f8 = f8
+ addl floor_GR_bigexp = 0x1003e, r0
+}
+{ .mfi
+ addl floor_GR_FFFF = -1,r0
+ fcmp.lt.s1 p8,p9 = f8,f0
+ mov floor_GR_expmask = 0x1FFFF ;;
+}
+
+// p7 ==> denorm
+{ .mfi
+ setf.sig FLOOR_FFFF = floor_GR_FFFF
+ fclass.m p7,p0 = f8, 0x0b
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fnorm.s1 FLOOR_NORM_f8 = f8
+ nop.i 999 ;;
+}
+
+// p6 ==> NAN, INF, ZERO
+{ .mfb
+ nop.m 999
+ fclass.m p6,p10 = f8, 0xe7
+(p7) br.cond.spnt L(FLOOR_DENORM) ;;
+}
+
+L(FLOOR_COMMON):
+.pred.rel "mutex",p8,p9
+// Set adjustment to subtract from trunc(x) for result
+// If x<0, adjustment is -1.0
+// If x>=0, adjustment is 0.0
+{ .mfi
+ and floor_GR_exponent = floor_GR_signexp, floor_GR_expmask
+(p8) fnma.s1 FLOOR_adj = f1,f1,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p9) fadd.s1 FLOOR_adj = f0,f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+ fcmp.eq.s0 p12,p0 = f8,f0 // Dummy op to set denormal and invalid flag
+ nop.i 999
+}
+{ .mfi
+(p10) cmp.ge.unc p10,p11 = floor_GR_exponent, floor_GR_bigexp
+(p6) fnorm f8 = f8
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p11) fcvt.xf FLOOR_FLOAT_INT_f8 = FLOOR_INT_f8
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p10) fnorm f8 = FLOOR_NORM_f8
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p11) fadd f8 = FLOOR_FLOAT_INT_f8,FLOOR_adj
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p11) fcmp.eq.unc.s1 p12,p13 = FLOOR_FLOAT_INT_f8, FLOOR_NORM_f8
+ nop.i 999 ;;
+}
+
+// Set inexact if result not equal to input
+{ .mfi
+ nop.m 999
+(p13) fmpy.s0 FLOOR_INEXACT = FLOOR_FFFF,FLOOR_FFFF
+ nop.i 999
+}
+// Set result to input if integer
+{ .mfb
+ nop.m 999
+(p12) fnorm f8 = FLOOR_NORM_f8
+ br.ret.sptk b0 ;;
+}
+
+// Here if input denorm
+L(FLOOR_DENORM):
+{ .mfb
+ getf.exp floor_GR_signexp = FLOOR_NORM_f8
+ fcvt.fx.trunc.s1 FLOOR_INT_f8 = FLOOR_NORM_f8
+ br.cond.sptk L(FLOOR_COMMON) ;;
+}
+
+.endp floorl
+ASM_SIZE_DIRECTIVE(floorl)
diff --git a/sysdeps/ia64/fpu/s_frexp.c b/sysdeps/ia64/fpu/s_frexp.c
new file mode 100644
index 0000000..752a9ee
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_frexp.c
@@ -0,0 +1,44 @@
+//
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+//
+
+#include "libm_support.h"
+
+double frexp(double x, int *y)
+{
+
+#ifdef SIZE_INT_64
+ return( __libm_frexp_8(x, y) );
+
+#else
+
+#ifdef SIZE_INT_32
+ return( __libm_frexp_4(x, y) );
+#endif
+
+#endif
+
+}
diff --git a/sysdeps/ia64/fpu/s_frexpf.c b/sysdeps/ia64/fpu/s_frexpf.c
new file mode 100644
index 0000000..9bbe51d
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_frexpf.c
@@ -0,0 +1,44 @@
+//
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+//
+
+#include "libm_support.h"
+
+float frexpf(float x, int *y)
+{
+
+#ifdef SIZE_INT_64
+ return( __libm_frexp_8f(x, y) );
+
+#else
+
+#ifdef SIZE_INT_32
+ return( __libm_frexp_4f(x, y) );
+#endif
+
+#endif
+
+}
diff --git a/sysdeps/ia64/fpu/s_frexpl.c b/sysdeps/ia64/fpu/s_frexpl.c
new file mode 100644
index 0000000..b85a779
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_frexpl.c
@@ -0,0 +1,44 @@
+//
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+//
+
+#include "libm_support.h"
+
+long double frexpl(long double x, int *y)
+{
+
+#ifdef SIZE_INT_64
+ return( __libm_frexp_8l(x, y) );
+
+#else
+
+#ifdef SIZE_INT_32
+ return( __libm_frexp_4l(x, y) );
+#endif
+
+#endif
+
+}
diff --git a/sysdeps/ia64/fpu/s_ilogb.S b/sysdeps/ia64/fpu/s_ilogb.S
new file mode 100644
index 0000000..d860ace
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_ilogb.S
@@ -0,0 +1,240 @@
+.file "ilogb.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/03/00 Initial version
+// 5/26/00 Fix bug when x a double-extended denormal;
+// if x=0 call error routine, per C9X
+// 8/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+// 1/20/01 Fixed result for x=0, corrected error tag value.
+
+.align 32
+.global ilogb#
+
+.section .text
+.proc ilogb#
+.align 32
+
+// API
+//==============================================================
+// int = ilogb(double)
+
+// Overview of operation
+//==============================================================
+// ilogb computes log2(x) as an int
+// and returns it in r8
+
+// ilogb is similar to logb but differs in the following ways:
+// +-inf
+// ilogb: returns INT_MAX
+// logb: returns +inf
+// Nan returns FP_ILOGBNAN (which is either INT_MAX or INT_MIN)
+// ilogb: returns INT_MAX (7fffffff)
+// logb: returns QNAN (quieted SNAN)
+// 0 returns FP_ILOGB0 (which is either INT_MIN or -INT_MAX)
+// ilogb: returns INT_MIN (80000000)
+// logb: returns -inf
+
+// Registers used
+//==============================================================
+
+// general local registers:
+// ar.pfs r32
+// r33 -> r37
+// r38 -> r41 used as parameters to error path
+
+// predicate registers used:
+// p6 - x nan, inf
+// p7 - x 0
+// p8 - x norm, unorm
+// p9 - x unorm
+
+// floating-point registers used:
+// f8 - f10
+
+#include "libm_support.h"
+
+GR_SAVE_PFS = r32
+GR_SAVE_B0 = r34
+GR_SAVE_GP = r35
+GR_Parameter_X = r38
+GR_Parameter_Y = r39
+GR_Parameter_RESULT = r40
+GR_Parameter_TAG = r41
+
+FR_X = f8
+FR_Y = f0
+FR_RESULT = f0
+
+
+ilogb:
+
+// Form signexp of 2^64 in case need to scale denormal
+{ .mmf
+ alloc r32=ar.pfs,1,5,4,0
+(p0) mov r37 = 0x1003f
+(p0) fnorm f9 = f8 ;;
+}
+
+// Form 2^64 in case need to scale denormal
+{ .mfi
+(p0) setf.exp f10 = r37
+(p0) fclass.m.unc p7, p8 = f8, 0xe3
+(p0) mov r34 = 0xffff ;;
+}
+
+// qnan snan inf norm unorm 0 -+
+// 1 1 1 0 0 0 11
+// e 3
+// X ZERO, returns INT_MIN
+// X INF or NAN, returns INT_MAX
+
+{ .mfi
+(p0) mov r35 = 0x1ffff
+(p8) fclass.m.unc p6, p8 = f8, 0x07
+ nop.i 999 ;;
+}
+{ .mlx
+ nop.m 999
+(p7) movl r8 = 0x000000007fffffff ;;
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+(p6) br.cond.spnt L(ILOGB_ZERO) ;;
+}
+
+// Test for denormal
+{ .mfi
+ nop.m 999
+(p8) fclass.m.unc p9, p0 = f9, 0x0b
+ nop.i 999 ;;
+}
+
+L(ILOGB_COMMON):
+// X NORMAL returns true exponent
+{ .mmi
+ nop.m 999
+(p8) getf.exp r33 = f9
+ nop.i 999 ;;
+}
+
+// If denormal add 64 to exponent bias for scaling
+{ .mfb
+(p9) add r34 = 64, r34
+ nop.f 999
+(p9) br.cond.spnt L(ILOGB_DENORM) ;;
+}
+
+{ .mmi
+(p8) and r36 = r35, r33
+ nop.m 999
+ nop.i 999 ;;
+}
+
+{ .mib
+(p8) sub r8 = r36, r34
+ nop.i 999
+(p0) br.ret.sptk b0 ;;
+}
+
+L(ILOGB_DENORM):
+// Here if x denormal
+// Form x * 2^64 which is normal
+// Return to common code
+{ .mfb
+ cmp.eq p8,p9 = r0,r0
+ fmpy f9 = f9, f10
+ br.cond.sptk L(ILOGB_COMMON) ;;
+}
+
+// X ZERO
+// return INT_MIN, call error support
+L(ILOGB_ZERO):
+{.mlx
+ mov GR_Parameter_TAG = 157
+(p6) movl r33 = 0x0000000080000000 ;;
+};;
+.endp ilogb
+ASM_SIZE_DIRECTIVE(ilogb)
+
+.proc __libm_error_region
+__libm_error_region:
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfd [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mib
+ stfd [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
+}
+{ .mib
+ stfd [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+{ .mmi
+ mov r8 = r33 // Store result
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/s_ilogbf.S b/sysdeps/ia64/fpu/s_ilogbf.S
new file mode 100644
index 0000000..0fb4d45
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_ilogbf.S
@@ -0,0 +1,240 @@
+.file "ilogbf.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/03/00 Initial version
+// 5/26/00 Fix bug when x a double-extended denormal;
+// if x=0 call error routine, per C9X
+// 8/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+// 1/20/01 Fixed result for x=0
+
+.align 32
+.global ilogbf#
+
+.section .text
+.proc ilogbf#
+.align 32
+
+// API
+//==============================================================
+// int = ilogbf(float)
+
+// Overview of operation
+//==============================================================
+// ilogbf computes log2(x) as an int
+// and returns it in r8
+
+// ilogbf is similar to logbf but differs in the following ways:
+// +-inf
+// ilogbf: returns INT_MAX
+// logbf: returns +inf
+// Nan returns FP_ILOGBNAN (which is either INT_MAX or INT_MIN)
+// ilogbf: returns INT_MAX (7fffffff)
+// logbf: returns QNAN (quieted SNAN)
+// 0 returns FP_ILOGB0 (which is either INT_MIN or -INT_MAX)
+// ilogbf: returns INT_MIN (80000000)
+// logbf: returns -inf
+
+// Registers used
+//==============================================================
+
+// general local registers:
+// ar.pfs r32
+// r33 -> r37
+// r38 -> r41 used as parameters to error path
+
+// predicate registers used:
+// p6 - x nan, inf
+// p7 - x 0
+// p8 - x norm, unorm
+// p9 - x unorm
+
+// floating-point registers used:
+// f8 - f10
+
+#include "libm_support.h"
+
+GR_SAVE_PFS = r32
+GR_SAVE_B0 = r34
+GR_SAVE_GP = r35
+GR_Parameter_X = r38
+GR_Parameter_Y = r39
+GR_Parameter_RESULT = r40
+GR_Parameter_TAG = r41
+
+FR_X = f8
+FR_Y = f0
+FR_RESULT = f0
+
+
+ilogbf:
+
+// Form signexp of 2^64 in case need to scale denormal
+{ .mmf
+ alloc r32=ar.pfs,1,5,4,0
+(p0) mov r37 = 0x1003f
+(p0) fnorm f9 = f8 ;;
+}
+
+// Form 2^64 in case need to scale denormal
+{ .mfi
+(p0) setf.exp f10 = r37
+(p0) fclass.m.unc p7, p8 = f8, 0xe3
+(p0) mov r34 = 0xffff ;;
+}
+
+// qnan snan inf norm unorm 0 -+
+// 1 1 1 0 0 0 11
+// e 3
+// X ZERO, returns INT_MIN
+// X INF or NAN, returns INT_MAX
+
+{ .mfi
+(p0) mov r35 = 0x1ffff
+(p8) fclass.m.unc p6, p8 = f8, 0x07
+ nop.i 999 ;;
+}
+{ .mlx
+ nop.m 999
+(p7) movl r8 = 0x000000007fffffff ;;
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+(p6) br.cond.spnt L(ILOGB_ZERO) ;;
+}
+
+// Test for denormal
+{ .mfi
+ nop.m 999
+(p8) fclass.m.unc p9, p0 = f9, 0x0b
+ nop.i 999 ;;
+}
+
+L(ILOGB_COMMON):
+// X NORMAL returns true exponent
+{ .mmi
+ nop.m 999
+(p8) getf.exp r33 = f9
+ nop.i 999 ;;
+}
+
+// If denormal add 64 to exponent bias for scaling
+{ .mfb
+(p9) add r34 = 64, r34
+ nop.f 999
+(p9) br.cond.spnt L(ILOGB_DENORM) ;;
+}
+
+{ .mmi
+(p8) and r36 = r35, r33
+ nop.m 999
+ nop.i 999 ;;
+}
+
+{ .mib
+(p8) sub r8 = r36, r34
+ nop.i 999
+(p0) br.ret.sptk b0 ;;
+}
+
+L(ILOGB_DENORM):
+// Here if x denormal
+// Form x * 2^64 which is normal
+// Return to common code
+{ .mfb
+ cmp.eq p8,p9 = r0,r0
+ fmpy f9 = f9, f10
+ br.cond.sptk L(ILOGB_COMMON) ;;
+}
+
+// X ZERO
+// return INT_MIN, call error support
+L(ILOGB_ZERO):
+{.mlx
+ mov GR_Parameter_TAG = 158
+(p6) movl r33 = 0x0000000080000000 ;;
+};;
+.endp ilogbf
+ASM_SIZE_DIRECTIVE(ilogbf)
+
+.proc __libm_error_region
+__libm_error_region:
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfs [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mib
+ stfs [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
+}
+{ .mib
+ stfs [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+{ .mmi
+ mov r8 = r33 // Store result
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/s_ilogbl.S b/sysdeps/ia64/fpu/s_ilogbl.S
new file mode 100644
index 0000000..4c67d49
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_ilogbl.S
@@ -0,0 +1,240 @@
+.file "ilogbl.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/03/00 Initial version
+// 5/26/00 Fix bug when x a double-extended denormal;
+// if x=0 call error routine, per C9X
+// 8/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+// 1/20/01 Fixed result for x=0
+
+.align 32
+.global ilogbl#
+
+.section .text
+.proc ilogbl#
+.align 32
+
+// API
+//==============================================================
+// int = ilogbl(double_extended)
+
+// Overview of operation
+//==============================================================
+// ilogbl computes log2(x) as an int
+// and returns it in r8
+
+// ilogbl is similar to logbl but differs in the following ways:
+// +-inf
+// ilogbl: returns INT_MAX
+// logbl: returns +inf
+// Nan returns FP_ILOGBNAN (which is either INT_MAX or INT_MIN)
+// ilogbl: returns INT_MAX (7fffffff)
+// logbl: returns QNAN (quieted SNAN)
+// 0 returns FP_ILOGB0 (which is either INT_MIN or -INT_MAX)
+// ilogbl: returns INT_MIN (80000000)
+// logbl: returns -inf
+
+// Registers used
+//==============================================================
+
+// general local registers:
+// ar.pfs r32
+// r33 -> r37
+// r38 -> r41 used as parameters to error path
+
+// predicate registers used:
+// p6 - x nan, inf
+// p7 - x 0
+// p8 - x norm, unorm
+// p9 - x unorm
+
+// floating-point registers used:
+// f8 - f10
+
+#include "libm_support.h"
+
+GR_SAVE_PFS = r32
+GR_SAVE_B0 = r34
+GR_SAVE_GP = r35
+GR_Parameter_X = r38
+GR_Parameter_Y = r39
+GR_Parameter_RESULT = r40
+GR_Parameter_TAG = r41
+
+FR_X = f8
+FR_Y = f0
+FR_RESULT = f0
+
+
+ilogbl:
+
+// Form signexp of 2^64 in case need to scale denormal
+{ .mmf
+ alloc r32=ar.pfs,1,5,4,0
+(p0) mov r37 = 0x1003f
+(p0) fnorm f9 = f8 ;;
+}
+
+// Form 2^64 in case need to scale denormal
+{ .mfi
+(p0) setf.exp f10 = r37
+(p0) fclass.m.unc p7, p8 = f8, 0xe3
+(p0) mov r34 = 0xffff ;;
+}
+
+// qnan snan inf norm unorm 0 -+
+// 1 1 1 0 0 0 11
+// e 3
+// X ZERO, returns INT_MIN
+// X INF or NAN, returns INT_MAX
+
+{ .mfi
+(p0) mov r35 = 0x1ffff
+(p8) fclass.m.unc p6, p8 = f8, 0x07
+ nop.i 999 ;;
+}
+{ .mlx
+ nop.m 999
+(p7) movl r8 = 0x000000007fffffff ;;
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+(p6) br.cond.spnt L(ILOGB_ZERO) ;;
+}
+
+// Test for denormal
+{ .mfi
+ nop.m 999
+(p8) fclass.m.unc p9, p0 = f9, 0x0b
+ nop.i 999 ;;
+}
+
+L(ILOGB_COMMON):
+// X NORMAL returns true exponent
+{ .mmi
+ nop.m 999
+(p8) getf.exp r33 = f9
+ nop.i 999 ;;
+}
+
+// If denormal add 64 to exponent bias for scaling
+{ .mfb
+(p9) add r34 = 64, r34
+ nop.f 999
+(p9) br.cond.spnt L(ILOGB_DENORM) ;;
+}
+
+{ .mmi
+(p8) and r36 = r35, r33
+ nop.m 999
+ nop.i 999 ;;
+}
+
+{ .mib
+(p8) sub r8 = r36, r34
+ nop.i 999
+(p0) br.ret.sptk b0 ;;
+}
+
+L(ILOGB_DENORM):
+// Here if x denormal
+// Form x * 2^64 which is normal
+// Return to common code
+{ .mfb
+ cmp.eq p8,p9 = r0,r0
+ fmpy f9 = f9, f10
+ br.cond.sptk L(ILOGB_COMMON) ;;
+}
+
+// X ZERO
+// return INT_MIN, call error support
+L(ILOGB_ZERO):
+{.mlx
+ mov GR_Parameter_TAG = 156
+(p6) movl r33 = 0x0000000080000000 ;;
+};;
+.endp ilogbl
+ASM_SIZE_DIRECTIVE(ilogbl)
+
+.proc __libm_error_region
+__libm_error_region:
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfe [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mib
+ stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
+}
+{ .mib
+ stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+{ .mmi
+ mov r8 = r33 // Store result
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/s_ldexp.S b/sysdeps/ia64/fpu/s_ldexp.S
new file mode 100644
index 0000000..73bd2f4
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_ldexp.S
@@ -0,0 +1,367 @@
+.file "ldexp.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00 Initial version
+// 1/26/01 ldex pcompletely reworked and now standalone version
+//
+// API
+//==============================================================
+// double = ldexp (double x, int n)
+// input floating point f8 and int n (r33)
+// output floating point f8
+//
+// Returns x* 2**n using an fma and detects overflow
+// and underflow.
+//
+//
+
+#include "libm_support.h"
+
+FR_Big = f6
+FR_NBig = f7
+FR_Floating_X = f8
+FR_Result = f8
+FR_Result2 = f9
+FR_Result3 = f11
+FR_Norm_X = f12
+FR_Two_N = f14
+FR_Two_to_Big = f15
+
+GR_N_Biased = r15
+GR_Big = r16
+GR_NBig = r17
+GR_Scratch = r18
+GR_Scratch1 = r19
+GR_Bias = r20
+GR_N_as_int = r21
+
+GR_SAVE_B0 = r32
+GR_SAVE_GP = r33
+GR_SAVE_PFS = r34
+GR_Parameter_X = r35
+GR_Parameter_Y = r36
+GR_Parameter_RESULT = r37
+GR_Tag = r38
+
+.align 32
+.global ldexp
+
+.section .text
+.proc ldexp
+.align 32
+
+ldexp:
+
+//
+// Is x NAN, INF, ZERO, +-?
+// Build the exponent Bias
+//
+{ .mfi
+ alloc r32=ar.pfs,1,2,4,0
+ fclass.m.unc p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero
+ addl GR_Bias = 0x0FFFF,r0
+}
+
+//
+// Sign extend input
+// Is N zero?
+// Normalize x
+//
+{ .mfi
+ cmp.eq.unc p6,p0 = r33,r0
+ fnorm.s1 FR_Norm_X = FR_Floating_X
+ sxt4 GR_N_as_int = r33
+}
+;;
+
+//
+// Normalize x
+// Branch and return special values.
+// Create -35000
+// Create 35000
+//
+{ .mfi
+ addl GR_Big = 35000,r0
+ nop.f 0
+ add GR_N_Biased = GR_Bias,GR_N_as_int
+}
+{ .mfb
+ addl GR_NBig = -35000,r0
+(p7) fma.d.s0 FR_Result = FR_Floating_X,f1, f0
+(p7) br.ret.spnt b0
+};;
+
+//
+// Build the exponent Bias
+// Return x when N = 0
+//
+{ .mfi
+ setf.exp FR_Two_N = GR_N_Biased
+ nop.f 0
+ addl GR_Scratch1 = 0x063BF,r0
+}
+{ .mfb
+ addl GR_Scratch = 0x019C3F,r0
+(p6) fma.d.s0 FR_Result = FR_Floating_X,f1, f0
+(p6) br.ret.spnt b0
+};;
+
+//
+// Create 2*big
+// Create 2**-big
+// Is N > 35000
+// Is N < -35000
+// Raise Denormal operand flag with compare
+// Main path, create 2**N
+//
+{ .mfi
+ setf.exp FR_NBig = GR_Scratch1
+ nop.f 0
+ cmp.ge.unc p6, p0 = GR_N_as_int, GR_Big
+}
+{ .mfi
+ setf.exp FR_Big = GR_Scratch
+ fcmp.ge.s0 p0,p11 = FR_Floating_X,f0
+ cmp.le.unc p8, p0 = GR_N_as_int, GR_NBig
+};;
+
+//
+// Adjust 2**N if N was very small or very large
+//
+{ .mfi
+ nop.m 0
+(p6) fma.s1 FR_Two_N = FR_Big,f1,f0
+ nop.i 0
+}
+{ .mlx
+ nop.m 999
+(p0) movl GR_Scratch = 0x00000000000303FF
+};;
+
+
+{ .mfi
+ nop.m 0
+(p8) fma.s1 FR_Two_N = FR_NBig,f1,f0
+ nop.i 0
+}
+{ .mlx
+ nop.m 999
+(p0) movl GR_Scratch1= 0x00000000000103FF
+};;
+
+// Set up necessary status fields
+//
+// S0 user supplied status
+// S2 user supplied status + WRE + TD (Overflows)
+// S3 user supplied status + FZ + TD (Underflows)
+//
+{ .mfi
+ nop.m 999
+(p0) fsetc.s3 0x7F,0x41
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fsetc.s2 0x7F,0x42
+ nop.i 999
+};;
+
+//
+// Do final operation
+//
+{ .mfi
+ setf.exp FR_NBig = GR_Scratch
+ fma.d.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.d.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
+ nop.i 999
+};;
+{ .mfi
+ setf.exp FR_Big = GR_Scratch1
+ fma.d.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
+ nop.i 999
+};;
+
+//
+// Check for overflow or underflow.
+// Restore s3
+// Restore s2
+//
+{ .mfi
+ nop.m 0
+ fsetc.s3 0x7F,0x40
+ nop.i 999
+}
+{ .mfi
+ nop.m 0
+ fsetc.s2 0x7F,0x40
+ nop.i 999
+};;
+
+//
+// Is the result zero?
+//
+{ .mfi
+ nop.m 999
+ fclass.m.unc p6, p0 = FR_Result3, 0x007
+ nop.i 999
+}
+{ .mfi
+ addl GR_Tag = 146, r0
+ fcmp.ge.unc.s1 p7, p8 = FR_Result2 , FR_Big
+ nop.i 0
+};;
+
+//
+// Detect masked underflow - Tiny + Inexact Only
+//
+{ .mfi
+ nop.m 999
+(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
+ nop.i 999
+};;
+
+//
+// Is result bigger the allowed range?
+// Branch out for underflow
+//
+{ .mfb
+(p6) addl GR_Tag = 147, r0
+(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
+(p6) br.cond.spnt L(LDEXP_UNDERFLOW)
+};;
+
+//
+// Branch out for overflow
+//
+{ .mbb
+ nop.m 0
+(p7) br.cond.spnt L(LDEXP_OVERFLOW)
+(p9) br.cond.spnt L(LDEXP_OVERFLOW)
+};;
+
+//
+// Return from main path.
+//
+{ .mfb
+ nop.m 999
+ nop.f 0
+ br.ret.sptk b0;;
+}
+
+.endp ldexp
+ASM_SIZE_DIRECTIVE(ldexp)
+.proc __libm_error_region
+__libm_error_region:
+
+L(LDEXP_OVERFLOW):
+L(LDEXP_UNDERFLOW):
+
+//
+// Get stack address of N
+//
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs
+}
+//
+// Adjust sp
+//
+{ .mfi
+.fframe 64
+ add sp=-64,sp
+ nop.f 0
+ mov GR_SAVE_GP=gp
+};;
+
+//
+// Store N on stack in correct position
+// Locate the address of x on stack
+//
+{ .mmi
+ st8 [GR_Parameter_Y] = GR_N_as_int,16
+ add GR_Parameter_X = 16,sp
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0
+};;
+
+//
+// Store x on the stack.
+// Get address for result on stack.
+//
+.body
+{ .mib
+ stfd [GR_Parameter_X] = FR_Norm_X
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0
+}
+{ .mib
+ stfd [GR_Parameter_Y] = FR_Result
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support#
+};;
+
+//
+// Get location of result on stack
+//
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+
+//
+// Get the new result
+//
+{ .mmi
+ ldfd FR_Result = [GR_Parameter_RESULT]
+.restore sp
+ add sp = 64,sp
+ mov b0 = GR_SAVE_B0
+};;
+
+//
+// Restore gp, ar.pfs and return
+//
+{ .mib
+ mov gp = GR_SAVE_GP
+ mov ar.pfs = GR_SAVE_PFS
+ br.ret.sptk b0
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/s_ldexpf.S b/sysdeps/ia64/fpu/s_ldexpf.S
new file mode 100644
index 0000000..07f750d
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_ldexpf.S
@@ -0,0 +1,366 @@
+//.file "ldexpf.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00 Initial version
+// 1/26/01 ldexpf completely reworked and now standalone version
+//
+// API
+//==============================================================
+// float = ldexpf (float x, int n)
+// input floating point f8 and int n (r33)
+// output floating point f8
+//
+// Returns x* 2**n using an fma and detects overflow
+// and underflow.
+//
+//
+
+#include "libm_support.h"
+
+FR_Big = f6
+FR_NBig = f7
+FR_Floating_X = f8
+FR_Result = f8
+FR_Result2 = f9
+FR_Result3 = f11
+FR_Norm_X = f12
+FR_Two_N = f14
+FR_Two_to_Big = f15
+
+GR_N_Biased = r15
+GR_Big = r16
+GR_NBig = r17
+GR_Scratch = r18
+GR_Scratch1 = r19
+GR_Bias = r20
+GR_N_as_int = r21
+
+GR_SAVE_B0 = r32
+GR_SAVE_GP = r33
+GR_SAVE_PFS = r34
+GR_Parameter_X = r35
+GR_Parameter_Y = r36
+GR_Parameter_RESULT = r37
+GR_Tag = r38
+
+.align 32
+.global ldexpf
+
+.section .text
+.proc ldexpf
+.align 32
+
+ldexpf:
+
+//
+// Is x NAN, INF, ZERO, +-?
+// Build the exponent Bias
+//
+{ .mfi
+ alloc r32=ar.pfs,1,2,4,0
+ fclass.m.unc p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero
+ addl GR_Bias = 0x0FFFF,r0
+}
+
+//
+// Sign extend input
+// Is N zero?
+// Normalize x
+//
+{ .mfi
+ cmp.eq.unc p6,p0 = r33,r0
+ fnorm.s1 FR_Norm_X = FR_Floating_X
+ sxt4 GR_N_as_int = r33
+}
+;;
+
+//
+// Normalize x
+// Branch and return special values.
+// Create -35000
+// Create 35000
+//
+{ .mfi
+ addl GR_Big = 35000,r0
+ nop.f 0
+ add GR_N_Biased = GR_Bias,GR_N_as_int
+}
+{ .mfb
+ addl GR_NBig = -35000,r0
+(p7) fma.s.s0 FR_Result = FR_Floating_X,f1, f0
+(p7) br.ret.spnt b0
+};;
+
+//
+// Build the exponent Bias
+// Return x when N = 0
+//
+{ .mfi
+ setf.exp FR_Two_N = GR_N_Biased
+ nop.f 0
+ addl GR_Scratch1 = 0x063BF,r0
+}
+{ .mfb
+ addl GR_Scratch = 0x019C3F,r0
+(p6) fma.s.s0 FR_Result = FR_Floating_X,f1, f0
+(p6) br.ret.spnt b0
+};;
+
+//
+// Create 2*big
+// Create 2**-big
+// Is N > 35000
+// Is N < -35000
+// Raise Denormal operand flag with compare
+// Main path, create 2**N
+//
+{ .mfi
+ setf.exp FR_NBig = GR_Scratch1
+ nop.f 0
+ cmp.ge.unc p6, p0 = GR_N_as_int, GR_Big
+}
+{ .mfi
+ setf.exp FR_Big = GR_Scratch
+ fcmp.ge.s0 p0,p11 = FR_Floating_X,f0
+ cmp.le.unc p8, p0 = GR_N_as_int, GR_NBig
+};;
+
+//
+// Adjust 2**N if N was very small or very large
+//
+{ .mfi
+ nop.m 0
+(p6) fma.s1 FR_Two_N = FR_Big,f1,f0
+ nop.i 0
+}
+{ .mlx
+ nop.m 999
+(p0) movl GR_Scratch = 0x000000000003007F
+};;
+
+
+{ .mfi
+ nop.m 0
+(p8) fma.s1 FR_Two_N = FR_NBig,f1,f0
+ nop.i 0
+}
+{ .mlx
+ nop.m 999
+(p0) movl GR_Scratch1= 0x000000000001007F
+};;
+
+// Set up necessary status fields
+//
+// S0 user supplied status
+// S2 user supplied status + WRE + TD (Overflows)
+// S3 user supplied status + FZ + TD (Underflows)
+//
+{ .mfi
+ nop.m 999
+(p0) fsetc.s3 0x7F,0x41
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fsetc.s2 0x7F,0x42
+ nop.i 999
+};;
+
+//
+// Do final operation
+//
+{ .mfi
+ setf.exp FR_NBig = GR_Scratch
+ fma.s.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
+ nop.i 999
+};;
+{ .mfi
+ setf.exp FR_Big = GR_Scratch1
+ fma.s.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
+ nop.i 999
+};;
+
+// Check for overflow or underflow.
+// Restore s3
+// Restore s2
+//
+{ .mfi
+ nop.m 0
+ fsetc.s3 0x7F,0x40
+ nop.i 999
+}
+{ .mfi
+ nop.m 0
+ fsetc.s2 0x7F,0x40
+ nop.i 999
+};;
+
+//
+// Is the result zero?
+//
+{ .mfi
+ nop.m 999
+ fclass.m.unc p6, p0 = FR_Result3, 0x007
+ nop.i 999
+}
+{ .mfi
+ addl GR_Tag = 148, r0
+ fcmp.ge.unc.s1 p7, p8 = FR_Result2 , FR_Big
+ nop.i 0
+};;
+
+//
+// Detect masked underflow - Tiny + Inexact Only
+//
+{ .mfi
+ nop.m 999
+(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
+ nop.i 999
+};;
+
+//
+// Is result bigger the allowed range?
+// Branch out for underflow
+//
+{ .mfb
+(p6) addl GR_Tag = 149, r0
+(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
+(p6) br.cond.spnt L(ldexpf_UNDERFLOW)
+};;
+
+//
+// Branch out for overflow
+//
+{ .mbb
+ nop.m 0
+(p7) br.cond.spnt L(ldexpf_OVERFLOW)
+(p9) br.cond.spnt L(ldexpf_OVERFLOW)
+};;
+
+//
+// Return from main path.
+//
+{ .mfb
+ nop.m 999
+ nop.f 0
+ br.ret.sptk b0;;
+}
+
+.endp ldexpf
+ASM_SIZE_DIRECTIVE(ldexpf)
+.proc __libm_error_region
+__libm_error_region:
+
+L(ldexpf_OVERFLOW):
+L(ldexpf_UNDERFLOW):
+
+//
+// Get stack address of N
+//
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs
+}
+//
+// Adjust sp
+//
+{ .mfi
+.fframe 64
+ add sp=-64,sp
+ nop.f 0
+ mov GR_SAVE_GP=gp
+};;
+
+//
+// Store N on stack in correct position
+// Locate the address of x on stack
+//
+{ .mmi
+ st8 [GR_Parameter_Y] = GR_N_as_int,16
+ add GR_Parameter_X = 16,sp
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0
+};;
+
+//
+// Store x on the stack.
+// Get address for result on stack.
+//
+.body
+{ .mib
+ stfs [GR_Parameter_X] = FR_Norm_X
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0
+}
+{ .mib
+ stfs [GR_Parameter_Y] = FR_Result
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support#
+};;
+
+//
+// Get location of result on stack
+//
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+
+//
+// Get the new result
+//
+{ .mmi
+ ldfs FR_Result = [GR_Parameter_RESULT]
+.restore sp
+ add sp = 64,sp
+ mov b0 = GR_SAVE_B0
+};;
+
+//
+// Restore gp, ar.pfs and return
+//
+{ .mib
+ mov gp = GR_SAVE_GP
+ mov ar.pfs = GR_SAVE_PFS
+ br.ret.sptk b0
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/s_ldexpl.S b/sysdeps/ia64/fpu/s_ldexpl.S
new file mode 100644
index 0000000..d9983a5
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_ldexpl.S
@@ -0,0 +1,366 @@
+//.file "ldexpl.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00 Initial version
+// 1/26/01 ldexpl completely reworked and now standalone version
+//
+// API
+//==============================================================
+// double-extended = ldexpl (double-extended x, int n)
+// input floating point f8 and int n (r34)
+// output floating point f8
+//
+// Returns x* 2**n using an fma and detects overflow
+// and underflow.
+//
+//
+
+#include "libm_support.h"
+
+FR_Big = f6
+FR_NBig = f7
+FR_Floating_X = f8
+FR_Result = f8
+FR_Result2 = f9
+FR_Result3 = f11
+FR_Norm_X = f12
+FR_Two_N = f14
+FR_Two_to_Big = f15
+
+GR_N_Biased = r15
+GR_Big = r16
+GR_NBig = r17
+GR_Scratch = r18
+GR_Scratch1 = r19
+GR_Bias = r20
+GR_N_as_int = r21
+
+GR_SAVE_B0 = r32
+GR_SAVE_GP = r33
+GR_SAVE_PFS = r34
+GR_Parameter_X = r35
+GR_Parameter_Y = r36
+GR_Parameter_RESULT = r37
+GR_Tag = r38
+
+.align 32
+.global ldexpl
+
+.section .text
+.proc ldexpl
+.align 32
+
+ldexpl:
+
+//
+// Is x NAN, INF, ZERO, +-?
+// Build the exponent Bias
+//
+{ .mfi
+ alloc r32=ar.pfs,2,1,4,0
+ fclass.m.unc p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero
+ addl GR_Bias = 0x0FFFF,r0
+}
+
+//
+// Sign extend input
+// Is N zero?
+// Normalize x
+//
+{ .mfi
+ cmp.eq.unc p6,p0 = r34,r0
+ fnorm.s1 FR_Norm_X = FR_Floating_X
+ sxt4 GR_N_as_int = r34
+}
+;;
+
+//
+// Normalize x
+// Branch and return special values.
+// Create -35000
+// Create 35000
+//
+{ .mfi
+ addl GR_Big = 35000,r0
+ nop.f 0
+ add GR_N_Biased = GR_Bias,GR_N_as_int
+}
+{ .mfb
+ addl GR_NBig = -35000,r0
+(p7) fma.s0 FR_Result = FR_Floating_X,f1, f0
+(p7) br.ret.spnt b0
+};;
+
+//
+// Build the exponent Bias
+// Return x when N = 0
+//
+{ .mfi
+ setf.exp FR_Two_N = GR_N_Biased
+ nop.f 0
+ addl GR_Scratch1 = 0x063BF,r0
+}
+{ .mfb
+ addl GR_Scratch = 0x019C3F,r0
+(p6) fma.s0 FR_Result = FR_Floating_X,f1, f0
+(p6) br.ret.spnt b0
+};;
+
+//
+// Create 2*big
+// Create 2**-big
+// Is N > 35000
+// Is N < -35000
+// Raise Denormal operand flag with compare
+// Main path, create 2**N
+//
+{ .mfi
+ setf.exp FR_NBig = GR_Scratch1
+ nop.f 0
+ cmp.ge.unc p6, p0 = GR_N_as_int, GR_Big
+}
+{ .mfi
+ setf.exp FR_Big = GR_Scratch
+ fcmp.ge.s0 p0,p11 = FR_Floating_X,f0
+ cmp.le.unc p8, p0 = GR_N_as_int, GR_NBig
+};;
+
+//
+// Adjust 2**N if N was very small or very large
+//
+{ .mfi
+ nop.m 0
+(p6) fma.s1 FR_Two_N = FR_Big,f1,f0
+ nop.i 0
+}
+{ .mlx
+ nop.m 999
+(p0) movl GR_Scratch = 0x0000000000033FFF
+};;
+
+
+{ .mfi
+ nop.m 0
+(p8) fma.s1 FR_Two_N = FR_NBig,f1,f0
+ nop.i 0
+}
+{ .mlx
+ nop.m 999
+(p0) movl GR_Scratch1= 0x0000000000013FFF
+};;
+
+// Set up necessary status fields
+//
+// S0 user supplied status
+// S2 user supplied status + WRE + TD (Overflows)
+// S3 user supplied status + FZ + TD (Underflows)
+//
+{ .mfi
+ nop.m 999
+(p0) fsetc.s3 0x7F,0x41
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fsetc.s2 0x7F,0x42
+ nop.i 999
+};;
+
+//
+// Do final operation
+//
+{ .mfi
+ setf.exp FR_NBig = GR_Scratch
+ fma.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
+ nop.i 999
+};;
+{ .mfi
+ setf.exp FR_Big = GR_Scratch1
+ fma.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
+ nop.i 999
+};;
+
+// Check for overflow or underflow.
+// Restore s3
+// Restore s2
+//
+{ .mfi
+ nop.m 0
+ fsetc.s3 0x7F,0x40
+ nop.i 999
+}
+{ .mfi
+ nop.m 0
+ fsetc.s2 0x7F,0x40
+ nop.i 999
+};;
+
+//
+// Is the result zero?
+//
+{ .mfi
+ nop.m 999
+ fclass.m.unc p6, p0 = FR_Result3, 0x007
+ nop.i 999
+}
+{ .mfi
+ addl GR_Tag = 144, r0
+ fcmp.ge.unc.s1 p7, p8 = FR_Result2 , FR_Big
+ nop.i 0
+};;
+
+//
+// Detect masked underflow - Tiny + Inexact Only
+//
+{ .mfi
+ nop.m 999
+(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
+ nop.i 999
+};;
+
+//
+// Is result bigger the allowed range?
+// Branch out for underflow
+//
+{ .mfb
+(p6) addl GR_Tag = 145, r0
+(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
+(p6) br.cond.spnt L(ldexpl_UNDERFLOW)
+};;
+
+//
+// Branch out for overflow
+//
+{ .mbb
+ nop.m 0
+(p7) br.cond.spnt L(ldexpl_OVERFLOW)
+(p9) br.cond.spnt L(ldexpl_OVERFLOW)
+};;
+
+//
+// Return from main path.
+//
+{ .mfb
+ nop.m 999
+ nop.f 0
+ br.ret.sptk b0;;
+}
+
+.endp ldexpl
+ASM_SIZE_DIRECTIVE(ldexpl)
+.proc __libm_error_region
+__libm_error_region:
+
+L(ldexpl_OVERFLOW):
+L(ldexpl_UNDERFLOW):
+
+//
+// Get stack address of N
+//
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs
+}
+//
+// Adjust sp
+//
+{ .mfi
+.fframe 64
+ add sp=-64,sp
+ nop.f 0
+ mov GR_SAVE_GP=gp
+};;
+
+//
+// Store N on stack in correct position
+// Locate the address of x on stack
+//
+{ .mmi
+ st8 [GR_Parameter_Y] = GR_N_as_int,16
+ add GR_Parameter_X = 16,sp
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0
+};;
+
+//
+// Store x on the stack.
+// Get address for result on stack.
+//
+.body
+{ .mib
+ stfe [GR_Parameter_X] = FR_Norm_X
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0
+}
+{ .mib
+ stfe [GR_Parameter_Y] = FR_Result
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support#
+};;
+
+//
+// Get location of result on stack
+//
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+
+//
+// Get the new result
+//
+{ .mmi
+ ldfe FR_Result = [GR_Parameter_RESULT]
+.restore sp
+ add sp = 64,sp
+ mov b0 = GR_SAVE_B0
+};;
+
+//
+// Restore gp, ar.pfs and return
+//
+{ .mib
+ mov gp = GR_SAVE_GP
+ mov ar.pfs = GR_SAVE_PFS
+ br.ret.sptk b0
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/s_log1p.S b/sysdeps/ia64/fpu/s_log1p.S
new file mode 100644
index 0000000..a49a183
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_log1p.S
@@ -0,0 +1,1614 @@
+.file "log1p.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00 Initial version
+// 4/04/00 Unwind support added
+// 8/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+//
+// *********************************************************************
+//
+// Function: log1p(x) = ln(x+1), for double precision x values
+//
+// *********************************************************************
+//
+// Accuracy: Very accurate for double precision values
+//
+// *********************************************************************
+//
+// Resources Used:
+//
+// Floating-Point Registers: f8 (Input and Return Value)
+// f9,f33-f55,f99
+//
+// General Purpose Registers:
+// r32-r53
+// r54-r57 (Used to pass arguments to error handling routine)
+//
+// Predicate Registers: p6-p15
+//
+// *********************************************************************
+//
+// IEEE Special Conditions:
+//
+// Denormal fault raised on denormal inputs
+// Overflow exceptions cannot occur
+// Underflow exceptions raised when appropriate for log1p
+// (Error Handling Routine called for underflow)
+// Inexact raised when appropriate by algorithm
+//
+// log1p(inf) = inf
+// log1p(-inf) = QNaN
+// log1p(+/-0) = +/-0
+// log1p(-1) = -inf
+// log1p(SNaN) = QNaN
+// log1p(QNaN) = QNaN
+// log1p(EM_special Values) = QNaN
+//
+// *********************************************************************
+//
+// Computation is based on the following kernel.
+//
+// ker_log_64( in_FR : X,
+// in_FR : E,
+// in_FR : Em1,
+// in_GR : Expo_Range,
+// out_FR : Y_hi,
+// out_FR : Y_lo,
+// out_FR : Scale,
+// out_PR : Safe )
+//
+// Overview
+//
+// The method consists of three cases.
+//
+// If |X+Em1| < 2^(-80) use case log1p_small;
+// elseif |X+Em1| < 2^(-7) use case log_near1;
+// else use case log_regular;
+//
+// Case log1p_small:
+//
+// log( 1 + (X+Em1) ) can be approximated by (X+Em1).
+//
+// Case log_near1:
+//
+// log( 1 + (X+Em1) ) can be approximated by a simple polynomial
+// in W = X+Em1. This polynomial resembles the truncated Taylor
+// series W - W^/2 + W^3/3 - ...
+//
+// Case log_regular:
+//
+// Here we use a table lookup method. The basic idea is that in
+// order to compute log(Arg) for an argument Arg in [1,2), we
+// construct a value G such that G*Arg is close to 1 and that
+// log(1/G) is obtainable easily from a table of values calculated
+// beforehand. Thus
+//
+// log(Arg) = log(1/G) + log(G*Arg)
+// = log(1/G) + log(1 + (G*Arg - 1))
+//
+// Because |G*Arg - 1| is small, the second term on the right hand
+// side can be approximated by a short polynomial. We elaborate
+// this method in four steps.
+//
+// Step 0: Initialization
+//
+// We need to calculate log( E + X ). Obtain N, S_hi, S_lo such that
+//
+// E + X = 2^N * ( S_hi + S_lo ) exactly
+//
+// where S_hi in [1,2) and S_lo is a correction to S_hi in the sense
+// that |S_lo| <= ulp(S_hi).
+//
+// Step 1: Argument Reduction
+//
+// Based on S_hi, obtain G_1, G_2, G_3 from a table and calculate
+//
+// G := G_1 * G_2 * G_3
+// r := (G * S_hi - 1) + G * S_lo
+//
+// These G_j's have the property that the product is exactly
+// representable and that |r| < 2^(-12) as a result.
+//
+// Step 2: Approximation
+//
+//
+// log(1 + r) is approximated by a short polynomial poly(r).
+//
+// Step 3: Reconstruction
+//
+//
+// Finally, log( E + X ) is given by
+//
+// log( E + X ) = log( 2^N * (S_hi + S_lo) )
+// ~=~ N*log(2) + log(1/G) + log(1 + r)
+// ~=~ N*log(2) + log(1/G) + poly(r).
+//
+// **** Algorithm ****
+//
+// Case log1p_small:
+//
+// Although log(1 + (X+Em1)) is basically X+Em1, we would like to
+// preserve the inexactness nature as well as consistent behavior
+// under different rounding modes. Note that this case can only be
+// taken if E is set to be 1.0. In this case, Em1 is zero, and that
+// X can be very tiny and thus the final result can possibly underflow.
+// Thus, we compare X against a threshold that is dependent on the
+// input Expo_Range. If |X| is smaller than this threshold, we set
+// SAFE to be FALSE.
+//
+// The result is returned as Y_hi, Y_lo, and in the case of SAFE
+// is FALSE, an additional value Scale is also returned.
+//
+// W := X + Em1
+// Threshold := Threshold_Table( Expo_Range )
+// Tiny := Tiny_Table( Expo_Range )
+//
+// If ( |W| > Threshold ) then
+// Y_hi := W
+// Y_lo := -W*W
+// Else
+// Y_hi := W
+// Y_lo := -Tiny
+// Scale := 2^(-100)
+// Safe := FALSE
+// EndIf
+//
+//
+// One may think that Y_lo should be -W*W/2; however, it does not matter
+// as Y_lo will be rounded off completely except for the correct effect in
+// directed rounding. Clearly -W*W is simplier to compute. Moreover,
+// because of the difference in exponent value, Y_hi + Y_lo or
+// Y_hi + Scale*Y_lo is always inexact.
+//
+// Case log_near1:
+//
+// Here we compute a simple polynomial. To exploit parallelism, we split
+// the polynomial into two portions.
+//
+// W := X + Em1
+// Wsq := W * W
+// W4 := Wsq*Wsq
+// W6 := W4*Wsq
+// Y_hi := W + Wsq*(P_1 + W*(P_2 + W*(P_3 + W*P_4))
+// Y_lo := W6*(P_5 + W*(P_6 + W*(P_7 + W*P_8)))
+// set lsb(Y_lo) to be 1
+//
+// Case log_regular:
+//
+// We present the algorithm in four steps.
+//
+// Step 0. Initialization
+// ----------------------
+//
+// Z := X + E
+// N := unbaised exponent of Z
+// S_hi := 2^(-N) * Z
+// S_lo := 2^(-N) * { (max(X,E)-Z) + min(X,E) }
+//
+// Note that S_lo is always 0 for the case E = 0.
+//
+// Step 1. Argument Reduction
+// --------------------------
+//
+// Let
+//
+// Z = 2^N * S_hi = 2^N * 1.d_1 d_2 d_3 ... d_63
+//
+// We obtain G_1, G_2, G_3 by the following steps.
+//
+//
+// Define X_0 := 1.d_1 d_2 ... d_14. This is extracted
+// from S_hi.
+//
+// Define A_1 := 1.d_1 d_2 d_3 d_4. This is X_0 truncated
+// to lsb = 2^(-4).
+//
+// Define index_1 := [ d_1 d_2 d_3 d_4 ].
+//
+// Fetch Z_1 := (1/A_1) rounded UP in fixed point with
+// fixed point lsb = 2^(-15).
+// Z_1 looks like z_0.z_1 z_2 ... z_15
+// Note that the fetching is done using index_1.
+// A_1 is actually not needed in the implementation
+// and is used here only to explain how is the value
+// Z_1 defined.
+//
+// Fetch G_1 := (1/A_1) truncated to 21 sig. bits.
+// floating pt. Again, fetching is done using index_1. A_1
+// explains how G_1 is defined.
+//
+// Calculate X_1 := X_0 * Z_1 truncated to lsb = 2^(-14)
+// = 1.0 0 0 0 d_5 ... d_14
+// This is accomplised by integer multiplication.
+// It is proved that X_1 indeed always begin
+// with 1.0000 in fixed point.
+//
+//
+// Define A_2 := 1.0 0 0 0 d_5 d_6 d_7 d_8. This is X_1
+// truncated to lsb = 2^(-8). Similar to A_1,
+// A_2 is not needed in actual implementation. It
+// helps explain how some of the values are defined.
+//
+// Define index_2 := [ d_5 d_6 d_7 d_8 ].
+//
+// Fetch Z_2 := (1/A_2) rounded UP in fixed point with
+// fixed point lsb = 2^(-15). Fetch done using index_2.
+// Z_2 looks like z_0.z_1 z_2 ... z_15
+//
+// Fetch G_2 := (1/A_2) truncated to 21 sig. bits.
+// floating pt.
+//
+// Calculate X_2 := X_1 * Z_2 truncated to lsb = 2^(-14)
+// = 1.0 0 0 0 0 0 0 0 d_9 d_10 ... d_14
+// This is accomplised by integer multiplication.
+// It is proved that X_2 indeed always begin
+// with 1.00000000 in fixed point.
+//
+//
+// Define A_3 := 1.0 0 0 0 0 0 0 0 d_9 d_10 d_11 d_12 d_13 1.
+// This is 2^(-14) + X_2 truncated to lsb = 2^(-13).
+//
+// Define index_3 := [ d_9 d_10 d_11 d_12 d_13 ].
+//
+// Fetch G_3 := (1/A_3) truncated to 21 sig. bits.
+// floating pt. Fetch is done using index_3.
+//
+// Compute G := G_1 * G_2 * G_3.
+//
+// This is done exactly since each of G_j only has 21 sig. bits.
+//
+// Compute
+//
+// r := (G*S_hi - 1) + G*S_lo using 2 FMA operations.
+//
+// thus, r approximates G*(S_hi+S_lo) - 1 to within a couple of
+// rounding errors.
+//
+//
+// Step 2. Approximation
+// ---------------------
+//
+// This step computes an approximation to log( 1 + r ) where r is the
+// reduced argument just obtained. It is proved that |r| <= 1.9*2^(-13);
+// thus log(1+r) can be approximated by a short polynomial:
+//
+// log(1+r) ~=~ poly = r + Q1 r^2 + ... + Q4 r^5
+//
+//
+// Step 3. Reconstruction
+// ----------------------
+//
+// This step computes the desired result of log(X+E):
+//
+// log(X+E) = log( 2^N * (S_hi + S_lo) )
+// = N*log(2) + log( S_hi + S_lo )
+// = N*log(2) + log(1/G) +
+// log(1 + C*(S_hi+S_lo) - 1 )
+//
+// log(2), log(1/G_j) are stored as pairs of (single,double) numbers:
+// log2_hi, log2_lo, log1byGj_hi, log1byGj_lo. The high parts are
+// single-precision numbers and the low parts are double precision
+// numbers. These have the property that
+//
+// N*log2_hi + SUM ( log1byGj_hi )
+//
+// is computable exactly in double-extended precision (64 sig. bits).
+// Finally
+//
+// Y_hi := N*log2_hi + SUM ( log1byGj_hi )
+// Y_lo := poly_hi + [ poly_lo +
+// ( SUM ( log1byGj_lo ) + N*log2_lo ) ]
+// set lsb(Y_lo) to be 1
+//
+
+#include "libm_support.h"
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
+// P_7, P_6, P_5, P_4, P_3, P_2, and P_1
+
+.align 64
+Constants_P:
+ASM_TYPE_DIRECTIVE(Constants_P,@object)
+data4 0xEFD62B15,0xE3936754,0x00003FFB,0x00000000
+data4 0xA5E56381,0x8003B271,0x0000BFFC,0x00000000
+data4 0x73282DB0,0x9249248C,0x00003FFC,0x00000000
+data4 0x47305052,0xAAAAAA9F,0x0000BFFC,0x00000000
+data4 0xCCD17FC9,0xCCCCCCCC,0x00003FFC,0x00000000
+data4 0x00067ED5,0x80000000,0x0000BFFD,0x00000000
+data4 0xAAAAAAAA,0xAAAAAAAA,0x00003FFD,0x00000000
+data4 0xFFFFFFFE,0xFFFFFFFF,0x0000BFFD,0x00000000
+ASM_SIZE_DIRECTIVE(Constants_P)
+
+// log2_hi, log2_lo, Q_4, Q_3, Q_2, and Q_1
+
+.align 64
+Constants_Q:
+ASM_TYPE_DIRECTIVE(Constants_Q,@object)
+data4 0x00000000,0xB1721800,0x00003FFE,0x00000000
+data4 0x4361C4C6,0x82E30865,0x0000BFE2,0x00000000
+data4 0x328833CB,0xCCCCCAF2,0x00003FFC,0x00000000
+data4 0xA9D4BAFB,0x80000077,0x0000BFFD,0x00000000
+data4 0xAAABE3D2,0xAAAAAAAA,0x00003FFD,0x00000000
+data4 0xFFFFDAB7,0xFFFFFFFF,0x0000BFFD,0x00000000
+ASM_SIZE_DIRECTIVE(Constants_Q)
+
+// Z1 - 16 bit fixed, G1 and H1 - IEEE single
+
+.align 64
+Constants_Z_G_H_h1:
+ASM_TYPE_DIRECTIVE(Constants_Z_G_H_h1,@object)
+data4 0x00008000,0x3F800000,0x00000000,0x00000000,0x00000000,0x00000000
+data4 0x00007879,0x3F70F0F0,0x3D785196,0x00000000,0x617D741C,0x3DA163A6
+data4 0x000071C8,0x3F638E38,0x3DF13843,0x00000000,0xCBD3D5BB,0x3E2C55E6
+data4 0x00006BCB,0x3F579430,0x3E2FF9A0,0x00000000,0xD86EA5E7,0xBE3EB0BF
+data4 0x00006667,0x3F4CCCC8,0x3E647FD6,0x00000000,0x86B12760,0x3E2E6A8C
+data4 0x00006187,0x3F430C30,0x3E8B3AE7,0x00000000,0x5C0739BA,0x3E47574C
+data4 0x00005D18,0x3F3A2E88,0x3EA30C68,0x00000000,0x13E8AF2F,0x3E20E30F
+data4 0x0000590C,0x3F321640,0x3EB9CEC8,0x00000000,0xF2C630BD,0xBE42885B
+data4 0x00005556,0x3F2AAAA8,0x3ECF9927,0x00000000,0x97E577C6,0x3E497F34
+data4 0x000051EC,0x3F23D708,0x3EE47FC5,0x00000000,0xA6B0A5AB,0x3E3E6A6E
+data4 0x00004EC5,0x3F1D89D8,0x3EF8947D,0x00000000,0xD328D9BE,0xBDF43E3C
+data4 0x00004BDB,0x3F17B420,0x3F05F3A1,0x00000000,0x0ADB090A,0x3E4094C3
+data4 0x00004925,0x3F124920,0x3F0F4303,0x00000000,0xFC1FE510,0xBE28FBB2
+data4 0x0000469F,0x3F0D3DC8,0x3F183EBF,0x00000000,0x10FDE3FA,0x3E3A7895
+data4 0x00004445,0x3F088888,0x3F20EC80,0x00000000,0x7CC8C98F,0x3E508CE5
+data4 0x00004211,0x3F042108,0x3F29516A,0x00000000,0xA223106C,0xBE534874
+ASM_SIZE_DIRECTIVE(Constants_Z_G_H_h1)
+
+// Z2 - 16 bit fixed, G2 and H2 - IEEE single
+
+.align 64
+Constants_Z_G_H_h2:
+ASM_TYPE_DIRECTIVE(Constants_Z_G_H_h2,@object)
+data4 0x00008000,0x3F800000,0x00000000,0x00000000,0x00000000,0x00000000
+data4 0x00007F81,0x3F7F00F8,0x3B7F875D,0x00000000,0x22C42273,0x3DB5A116
+data4 0x00007F02,0x3F7E03F8,0x3BFF015B,0x00000000,0x21F86ED3,0x3DE620CF
+data4 0x00007E85,0x3F7D08E0,0x3C3EE393,0x00000000,0x484F34ED,0xBDAFA07E
+data4 0x00007E08,0x3F7C0FC0,0x3C7E0586,0x00000000,0x3860BCF6,0xBDFE07F0
+data4 0x00007D8D,0x3F7B1880,0x3C9E75D2,0x00000000,0xA78093D6,0x3DEA370F
+data4 0x00007D12,0x3F7A2328,0x3CBDC97A,0x00000000,0x72A753D0,0x3DFF5791
+data4 0x00007C98,0x3F792FB0,0x3CDCFE47,0x00000000,0xA7EF896B,0x3DFEBE6C
+data4 0x00007C20,0x3F783E08,0x3CFC15D0,0x00000000,0x409ECB43,0x3E0CF156
+data4 0x00007BA8,0x3F774E38,0x3D0D874D,0x00000000,0xFFEF71DF,0xBE0B6F97
+data4 0x00007B31,0x3F766038,0x3D1CF49B,0x00000000,0x5D59EEE8,0xBE080483
+data4 0x00007ABB,0x3F757400,0x3D2C531D,0x00000000,0xA9192A74,0x3E1F91E9
+data4 0x00007A45,0x3F748988,0x3D3BA322,0x00000000,0xBF72A8CD,0xBE139A06
+data4 0x000079D1,0x3F73A0D0,0x3D4AE46F,0x00000000,0xF8FBA6CF,0x3E1D9202
+data4 0x0000795D,0x3F72B9D0,0x3D5A1756,0x00000000,0xBA796223,0xBE1DCCC4
+data4 0x000078EB,0x3F71D488,0x3D693B9D,0x00000000,0xB6B7C239,0xBE049391
+ASM_SIZE_DIRECTIVE(Constants_Z_G_H_h2)
+
+// G3 and H3 - IEEE single and h3 -IEEE double
+
+.align 64
+Constants_Z_G_H_h3:
+ASM_TYPE_DIRECTIVE(Constants_Z_G_H_h3,@object)
+data4 0x3F7FFC00,0x38800100,0x562224CD,0x3D355595
+data4 0x3F7FF400,0x39400480,0x06136FF6,0x3D8200A2
+data4 0x3F7FEC00,0x39A00640,0xE8DE9AF0,0x3DA4D68D
+data4 0x3F7FE400,0x39E00C41,0xB10238DC,0xBD8B4291
+data4 0x3F7FDC00,0x3A100A21,0x3B1952CA,0xBD89CCB8
+data4 0x3F7FD400,0x3A300F22,0x1DC46826,0xBDB10707
+data4 0x3F7FCC08,0x3A4FF51C,0xF43307DB,0x3DB6FCB9
+data4 0x3F7FC408,0x3A6FFC1D,0x62DC7872,0xBD9B7C47
+data4 0x3F7FBC10,0x3A87F20B,0x3F89154A,0xBDC3725E
+data4 0x3F7FB410,0x3A97F68B,0x62B9D392,0xBD93519D
+data4 0x3F7FAC18,0x3AA7EB86,0x0F21BD9D,0x3DC18441
+data4 0x3F7FA420,0x3AB7E101,0x2245E0A6,0xBDA64B95
+data4 0x3F7F9C20,0x3AC7E701,0xAABB34B8,0x3DB4B0EC
+data4 0x3F7F9428,0x3AD7DD7B,0x6DC40A7E,0x3D992337
+data4 0x3F7F8C30,0x3AE7D474,0x4F2083D3,0x3DC6E17B
+data4 0x3F7F8438,0x3AF7CBED,0x811D4394,0x3DAE314B
+data4 0x3F7F7C40,0x3B03E1F3,0xB08F2DB1,0xBDD46F21
+data4 0x3F7F7448,0x3B0BDE2F,0x6D34522B,0xBDDC30A4
+data4 0x3F7F6C50,0x3B13DAAA,0xB1F473DB,0x3DCB0070
+data4 0x3F7F6458,0x3B1BD766,0x6AD282FD,0xBDD65DDC
+data4 0x3F7F5C68,0x3B23CC5C,0xF153761A,0xBDCDAB83
+data4 0x3F7F5470,0x3B2BC997,0x341D0F8F,0xBDDADA40
+data4 0x3F7F4C78,0x3B33C711,0xEBC394E8,0x3DCD1BD7
+data4 0x3F7F4488,0x3B3BBCC6,0x52E3E695,0xBDC3532B
+data4 0x3F7F3C90,0x3B43BAC0,0xE846B3DE,0xBDA3961E
+data4 0x3F7F34A0,0x3B4BB0F4,0x785778D4,0xBDDADF06
+data4 0x3F7F2CA8,0x3B53AF6D,0xE55CE212,0x3DCC3ED1
+data4 0x3F7F24B8,0x3B5BA620,0x9E382C15,0xBDBA3103
+data4 0x3F7F1CC8,0x3B639D12,0x5C5AF197,0x3D635A0B
+data4 0x3F7F14D8,0x3B6B9444,0x71D34EFC,0xBDDCCB19
+data4 0x3F7F0CE0,0x3B7393BC,0x52CD7ADA,0x3DC74502
+data4 0x3F7F04F0,0x3B7B8B6D,0x7D7F2A42,0xBDB68F17
+ASM_SIZE_DIRECTIVE(Constants_Z_G_H_h3)
+
+//
+// Exponent Thresholds and Tiny Thresholds
+// for 8, 11, 15, and 17 bit exponents
+//
+// Expo_Range Value
+//
+// 0 (8 bits) 2^(-126)
+// 1 (11 bits) 2^(-1022)
+// 2 (15 bits) 2^(-16382)
+// 3 (17 bits) 2^(-16382)
+//
+// Tiny_Table
+// ----------
+// Expo_Range Value
+//
+// 0 (8 bits) 2^(-16382)
+// 1 (11 bits) 2^(-16382)
+// 2 (15 bits) 2^(-16382)
+// 3 (17 bits) 2^(-16382)
+//
+
+.align 64
+Constants_Threshold:
+ASM_TYPE_DIRECTIVE(Constants_Threshold,@object)
+data4 0x00000000,0x80000000,0x00003F81,0x00000000
+data4 0x00000000,0x80000000,0x00000001,0x00000000
+data4 0x00000000,0x80000000,0x00003C01,0x00000000
+data4 0x00000000,0x80000000,0x00000001,0x00000000
+data4 0x00000000,0x80000000,0x00000001,0x00000000
+data4 0x00000000,0x80000000,0x00000001,0x00000000
+data4 0x00000000,0x80000000,0x00000001,0x00000000
+data4 0x00000000,0x80000000,0x00000001,0x00000000
+ASM_SIZE_DIRECTIVE(Constants_Threshold)
+
+.align 64
+Constants_1_by_LN10:
+ASM_TYPE_DIRECTIVE(Constants_1_by_LN10,@object)
+data4 0x37287195,0xDE5BD8A9,0x00003FFD,0x00000000
+data4 0xACCF70C8,0xD56EAABE,0x00003FBD,0x00000000
+ASM_SIZE_DIRECTIVE(Constants_1_by_LN10)
+
+FR_Input_X = f8
+FR_Neg_One = f9
+FR_E = f33
+FR_Em1 = f34
+FR_Y_hi = f34
+// Shared with Em1
+FR_Y_lo = f35
+FR_Scale = f36
+FR_X_Prime = f37
+FR_Z = f38
+FR_S_hi = f38
+// Shared with Z
+FR_W = f39
+FR_G = f40
+FR_wsq = f40
+// Shared with G
+FR_H = f41
+FR_w4 = f41
+// Shared with H
+FR_h = f42
+FR_w6 = f42
+// Shared with h
+FR_G_tmp = f43
+FR_poly_lo = f43
+// Shared with G_tmp
+FR_P8 = f43
+// Shared with G_tmp
+FR_H_tmp = f44
+FR_poly_hi = f44
+ // Shared with H_tmp
+FR_P7 = f44
+// Shared with H_tmp
+FR_h_tmp = f45
+FR_rsq = f45
+// Shared with h_tmp
+FR_P6 = f45
+// Shared with h_tmp
+FR_abs_W = f46
+FR_r = f46
+// Shared with abs_W
+FR_AA = f47
+FR_log2_hi = f47
+// Shared with AA
+FR_BB = f48
+FR_log2_lo = f48
+// Shared with BB
+FR_S_lo = f49
+FR_two_negN = f50
+FR_float_N = f51
+FR_Q4 = f52
+FR_dummy = f52
+// Shared with Q4
+FR_P4 = f52
+// Shared with Q4
+FR_Threshold = f52
+// Shared with Q4
+FR_Q3 = f53
+FR_P3 = f53
+// Shared with Q3
+FR_Tiny = f53
+// Shared with Q3
+FR_Q2 = f54
+FR_P2 = f54
+// Shared with Q2
+FR_1LN10_hi = f54
+// Shared with Q2
+FR_Q1 = f55
+FR_P1 = f55
+// Shared with Q1
+FR_1LN10_lo = f55
+// Shared with Q1
+FR_P5 = f98
+FR_SCALE = f98
+FR_Output_X_tmp = f99
+
+GR_Expo_Range = r32
+GR_Table_Base = r34
+GR_Table_Base1 = r35
+GR_Table_ptr = r36
+GR_Index2 = r37
+GR_signif = r38
+GR_X_0 = r39
+GR_X_1 = r40
+GR_X_2 = r41
+GR_Z_1 = r42
+GR_Z_2 = r43
+GR_N = r44
+GR_Bias = r45
+GR_M = r46
+GR_ScaleN = r47
+GR_Index3 = r48
+GR_Perturb = r49
+GR_Table_Scale = r50
+
+
+GR_SAVE_PFS = r51
+GR_SAVE_B0 = r52
+GR_SAVE_GP = r53
+
+GR_Parameter_X = r54
+GR_Parameter_Y = r55
+GR_Parameter_RESULT = r56
+
+GR_Parameter_TAG = r57
+
+
+.section .text
+.proc log1p#
+.global log1p#
+.align 64
+log1p:
+#ifdef _LIBC
+.global __log1p
+__log1p:
+#endif
+
+{ .mfi
+alloc r32 = ar.pfs,0,22,4,0
+(p0) fsub.s1 FR_Neg_One = f0,f1
+(p0) cmp.eq.unc p7, p0 = r0, r0
+}
+
+{ .mfi
+(p0) cmp.ne.unc p14, p0 = r0, r0
+(p0) fnorm.s1 FR_X_Prime = FR_Input_X
+(p0) cmp.eq.unc p15, p0 = r0, r0 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p6, p0 = FR_Input_X, 0x1E3
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p0) fclass.nm.unc p10, p0 = FR_Input_X, 0x1FF
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p0) fcmp.eq.unc.s1 p9, p0 = FR_Input_X, f0
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fadd FR_Em1 = f0,f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fadd FR_E = f0,f1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fcmp.eq.unc.s1 p8, p0 = FR_Input_X, FR_Neg_One
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fcmp.lt.unc.s1 p13, p0 = FR_Input_X, FR_Neg_One
+ nop.i 999
+}
+
+
+L(LOG_BEGIN):
+
+{ .mfi
+ nop.m 999
+(p0) fadd.s1 FR_Z = FR_X_Prime, FR_E
+ nop.i 999
+}
+
+{ .mlx
+ nop.m 999
+(p0) movl GR_Table_Scale = 0x0000000000000018 ;;
+}
+
+{ .mmi
+ nop.m 999
+//
+// Create E = 1 and Em1 = 0
+// Check for X == 0, meaning log(1+0)
+// Check for X < -1, meaning log(negative)
+// Check for X == -1, meaning log(0)
+// Normalize x
+// Identify NatVals, NaNs, Infs.
+// Identify EM unsupporteds.
+// Identify Negative values - us S1 so as
+// not to raise denormal operand exception
+// Set p15 to true for log1p
+// Set p14 to false for log1p
+// Set p7 true for log and log1p
+//
+(p0) addl GR_Table_Base = @ltoff(Constants_Z_G_H_h1#),gp
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fmax.s1 FR_AA = FR_X_Prime, FR_E
+ nop.i 999 ;;
+}
+
+{ .mfi
+ ld8 GR_Table_Base = [GR_Table_Base]
+(p0) fmin.s1 FR_BB = FR_X_Prime, FR_E
+ nop.i 999
+}
+
+{ .mfb
+ nop.m 999
+(p0) fadd.s1 FR_W = FR_X_Prime, FR_Em1
+//
+// Begin load of constants base
+// FR_Z = Z = |x| + E
+// FR_W = W = |x| + Em1
+// AA = fmax(|x|,E)
+// BB = fmin(|x|,E)
+//
+(p6) br.cond.spnt L(LOG_64_special) ;;
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+(p10) br.cond.spnt L(LOG_64_unsupported) ;;
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+(p13) br.cond.spnt L(LOG_64_negative) ;;
+}
+
+{ .mib
+(p0) getf.sig GR_signif = FR_Z
+ nop.i 999
+(p9) br.cond.spnt L(LOG_64_one) ;;
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+(p8) br.cond.spnt L(LOG_64_zero) ;;
+}
+
+{ .mfi
+(p0) getf.exp GR_N = FR_Z
+//
+// Raise possible denormal operand exception
+// Create Bias
+//
+// This function computes ln( x + e )
+// Input FR 1: FR_X = FR_Input_X
+// Input FR 2: FR_E = FR_E
+// Input FR 3: FR_Em1 = FR_Em1
+// Input GR 1: GR_Expo_Range = GR_Expo_Range = 1
+// Output FR 4: FR_Y_hi
+// Output FR 5: FR_Y_lo
+// Output FR 6: FR_Scale
+// Output PR 7: PR_Safe
+//
+(p0) fsub.s1 FR_S_lo = FR_AA, FR_Z
+//
+// signif = getf.sig(Z)
+// abs_W = fabs(w)
+//
+(p0) extr.u GR_Table_ptr = GR_signif, 59, 4 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fmerge.se FR_S_hi = f1,FR_Z
+(p0) extr.u GR_X_0 = GR_signif, 49, 15
+}
+
+{ .mmi
+ nop.m 999
+(p0) addl GR_Table_Base1 = @ltoff(Constants_Z_G_H_h2#),gp
+ nop.i 999
+}
+;;
+
+{ .mlx
+ ld8 GR_Table_Base1 = [GR_Table_Base1]
+(p0) movl GR_Bias = 0x000000000000FFFF ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fabs FR_abs_W = FR_W
+(p0) pmpyshr2.u GR_Table_ptr = GR_Table_ptr,GR_Table_Scale,0
+}
+
+{ .mfi
+ nop.m 999
+//
+// Branch out for special input values
+//
+(p0) fcmp.lt.unc.s0 p8, p0 = FR_Input_X, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// X_0 = extr.u(signif,49,15)
+// Index1 = extr.u(signif,59,4)
+//
+(p0) fadd.s1 FR_S_lo = FR_S_lo, FR_BB
+ nop.i 999 ;;
+}
+
+{ .mii
+ nop.m 999
+ nop.i 999 ;;
+//
+// Offset_to_Z1 = 24 * Index1
+// For performance, don't use result
+// for 3 or 4 cycles.
+//
+(p0) add GR_Table_ptr = GR_Table_ptr, GR_Table_Base ;;
+}
+//
+// Add Base to Offset for Z1
+// Create Bias
+
+{ .mmi
+(p0) ld4 GR_Z_1 = [GR_Table_ptr],4 ;;
+(p0) ldfs FR_G = [GR_Table_ptr],4
+ nop.i 999 ;;
+}
+
+{ .mmi
+(p0) ldfs FR_H = [GR_Table_ptr],8 ;;
+(p0) ldfd FR_h = [GR_Table_ptr],0
+(p0) pmpyshr2.u GR_X_1 = GR_X_0,GR_Z_1,15
+}
+//
+// Load Z_1
+// Get Base of Table2
+//
+
+{ .mfi
+(p0) getf.exp GR_M = FR_abs_W
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mii
+ nop.m 999
+ nop.i 999 ;;
+//
+// M = getf.exp(abs_W)
+// S_lo = AA - Z
+// X_1 = pmpyshr2(X_0,Z_1,15)
+//
+(p0) sub GR_M = GR_M, GR_Bias ;;
+}
+//
+// M = M - Bias
+// Load G1
+// N = getf.exp(Z)
+//
+
+{ .mii
+(p0) cmp.gt.unc p11, p0 = -80, GR_M
+(p0) cmp.gt.unc p12, p0 = -7, GR_M ;;
+(p0) extr.u GR_Index2 = GR_X_1, 6, 4 ;;
+}
+
+{ .mib
+ nop.m 999
+//
+// if -80 > M, set p11
+// Index2 = extr.u(X_1,6,4)
+// if -7 > M, set p12
+// Load H1
+//
+(p0) pmpyshr2.u GR_Index2 = GR_Index2,GR_Table_Scale,0
+(p11) br.cond.spnt L(log1p_small) ;;
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+(p12) br.cond.spnt L(log1p_near) ;;
+}
+
+{ .mii
+(p0) sub GR_N = GR_N, GR_Bias
+//
+// poly_lo = r * poly_lo
+//
+(p0) add GR_Perturb = 0x1, r0 ;;
+(p0) sub GR_ScaleN = GR_Bias, GR_N
+}
+
+{ .mii
+(p0) setf.sig FR_float_N = GR_N
+ nop.i 999 ;;
+//
+// Prepare Index2 - pmpyshr2.u(X_1,Z_2,15)
+// Load h1
+// S_lo = S_lo + BB
+// Branch for -80 > M
+//
+(p0) add GR_Index2 = GR_Index2, GR_Table_Base1
+}
+
+{ .mmi
+(p0) setf.exp FR_two_negN = GR_ScaleN
+ nop.m 999
+(p0) addl GR_Table_Base = @ltoff(Constants_Z_G_H_h3#),gp
+};;
+
+//
+// Index2 points to Z2
+// Branch for -7 > M
+//
+
+{ .mmb
+(p0) ld4 GR_Z_2 = [GR_Index2],4
+ ld8 GR_Table_Base = [GR_Table_Base]
+ nop.b 999 ;;
+}
+(p0) nop.i 999
+//
+// Load Z_2
+// N = N - Bias
+// Tablebase points to Table3
+//
+
+{ .mmi
+(p0) ldfs FR_G_tmp = [GR_Index2],4 ;;
+//
+// Load G_2
+// pmpyshr2 X_2= (X_1,Z_2,15)
+// float_N = setf.sig(N)
+// ScaleN = Bias - N
+//
+(p0) ldfs FR_H_tmp = [GR_Index2],8
+ nop.i 999 ;;
+}
+//
+// Load H_2
+// two_negN = setf.exp(scaleN)
+// G = G_1 * G_2
+//
+
+{ .mfi
+(p0) ldfd FR_h_tmp = [GR_Index2],0
+ nop.f 999
+(p0) pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15 ;;
+}
+
+{ .mii
+ nop.m 999
+(p0) extr.u GR_Index3 = GR_X_2, 1, 5 ;;
+//
+// Load h_2
+// H = H_1 + H_2
+// h = h_1 + h_2
+// Index3 = extr.u(X_2,1,5)
+//
+(p0) shladd GR_Index3 = GR_Index3,4,GR_Table_Base
+}
+
+{ .mmi
+ nop.m 999
+ nop.m 999
+//
+// float_N = fcvt.xf(float_N)
+// load G3
+//
+(p0) addl GR_Table_Base = @ltoff(Constants_Q#),gp ;;
+}
+
+{ .mfi
+ld8 GR_Table_Base = [GR_Table_Base]
+nop.f 999
+nop.i 999
+} ;;
+
+{ .mfi
+(p0) ldfe FR_log2_hi = [GR_Table_Base],16
+(p0) fmpy.s1 FR_S_lo = FR_S_lo, FR_two_negN
+ nop.i 999 ;;
+}
+
+{ .mmf
+ nop.m 999
+//
+// G = G3 * G
+// Load h3
+// Load log2_hi
+// H = H + H3
+//
+(p0) ldfe FR_log2_lo = [GR_Table_Base],16
+(p0) fmpy.s1 FR_G = FR_G, FR_G_tmp ;;
+}
+
+{ .mmf
+(p0) ldfs FR_G_tmp = [GR_Index3],4
+//
+// h = h + h3
+// r = G * S_hi + 1
+// Load log2_lo
+//
+(p0) ldfe FR_Q4 = [GR_Table_Base],16
+(p0) fadd.s1 FR_h = FR_h, FR_h_tmp ;;
+}
+
+{ .mfi
+(p0) ldfe FR_Q3 = [GR_Table_Base],16
+(p0) fadd.s1 FR_H = FR_H, FR_H_tmp
+ nop.i 999 ;;
+}
+
+{ .mmf
+(p0) ldfs FR_H_tmp = [GR_Index3],4
+(p0) ldfe FR_Q2 = [GR_Table_Base],16
+//
+// Comput Index for Table3
+// S_lo = S_lo * two_negN
+//
+(p0) fcvt.xf FR_float_N = FR_float_N ;;
+}
+//
+// If S_lo == 0, set p8 false
+// Load H3
+// Load ptr to table of polynomial coeff.
+//
+
+{ .mmf
+(p0) ldfd FR_h_tmp = [GR_Index3],0
+(p0) ldfe FR_Q1 = [GR_Table_Base],0
+(p0) fcmp.eq.unc.s1 p0, p8 = FR_S_lo, f0 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fmpy.s1 FR_G = FR_G, FR_G_tmp
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fadd.s1 FR_H = FR_H, FR_H_tmp
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fms.s1 FR_r = FR_G, FR_S_hi, f1
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fadd.s1 FR_h = FR_h, FR_h_tmp
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 FR_Y_hi = FR_float_N, FR_log2_hi, FR_H
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Load Q4
+// Load Q3
+// Load Q2
+// Load Q1
+//
+(p8) fma.s1 FR_r = FR_G, FR_S_lo, FR_r
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+//
+// poly_lo = r * Q4 + Q3
+// rsq = r* r
+//
+(p0) fma.s1 FR_h = FR_float_N, FR_log2_lo, FR_h
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// If (S_lo!=0) r = s_lo * G + r
+//
+(p0) fma.s1 FR_poly_lo = FR_r, FR_Q4, FR_Q3
+ nop.i 999
+}
+//
+// Create a 0x00000....01
+// poly_lo = poly_lo * rsq + h
+//
+
+{ .mfi
+(p0) setf.sig FR_dummy = GR_Perturb
+(p0) fmpy.s1 FR_rsq = FR_r, FR_r
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// h = N * log2_lo + h
+// Y_hi = n * log2_hi + H
+//
+(p0) fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_Q2
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// poly_lo = r * poly_o + Q2
+// poly_hi = Q1 * rsq + r
+//
+(p0) fmpy.s1 FR_poly_lo = FR_poly_lo, FR_r
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 FR_poly_lo = FR_poly_lo, FR_rsq, FR_h
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p0) fadd.s1 FR_Y_lo = FR_poly_hi, FR_poly_lo
+//
+// Create the FR for a binary "or"
+// Y_lo = poly_hi + poly_lo
+//
+// (p0) for FR_dummy = FR_Y_lo,FR_dummy ;;
+//
+// Turn the lsb of Y_lo ON
+//
+// (p0) fmerge.se FR_Y_lo = FR_Y_lo,FR_dummy ;;
+//
+// Merge the new lsb into Y_lo, for alone doesn't
+//
+(p0) br.cond.sptk L(LOG_main) ;;
+}
+
+
+L(log1p_near):
+
+{ .mmi
+ nop.m 999
+ nop.m 999
+// /*******************************************************/
+// /*********** Branch log1p_near ************************/
+// /*******************************************************/
+(p0) addl GR_Table_Base = @ltoff(Constants_P#),gp ;;
+}
+//
+// Load base address of poly. coeff.
+//
+{.mmi
+ nop.m 999
+ ld8 GR_Table_Base = [GR_Table_Base]
+ nop.i 999
+};;
+
+{ .mmb
+(p0) add GR_Table_ptr = 0x40,GR_Table_Base
+//
+// Address tables with separate pointers
+//
+(p0) ldfe FR_P8 = [GR_Table_Base],16
+ nop.b 999 ;;
+}
+
+{ .mmb
+(p0) ldfe FR_P4 = [GR_Table_ptr],16
+//
+// Load P4
+// Load P8
+//
+(p0) ldfe FR_P7 = [GR_Table_Base],16
+ nop.b 999 ;;
+}
+
+{ .mmf
+(p0) ldfe FR_P3 = [GR_Table_ptr],16
+//
+// Load P3
+// Load P7
+//
+(p0) ldfe FR_P6 = [GR_Table_Base],16
+(p0) fmpy.s1 FR_wsq = FR_W, FR_W ;;
+}
+
+{ .mfi
+(p0) ldfe FR_P2 = [GR_Table_ptr],16
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 FR_Y_hi = FR_W, FR_P4, FR_P3
+ nop.i 999
+}
+//
+// Load P2
+// Load P6
+// Wsq = w * w
+// Y_hi = p4 * w + p3
+//
+
+{ .mfi
+(p0) ldfe FR_P5 = [GR_Table_Base],16
+(p0) fma.s1 FR_Y_lo = FR_W, FR_P8, FR_P7
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p0) ldfe FR_P1 = [GR_Table_ptr],16
+//
+// Load P1
+// Load P5
+// Y_lo = p8 * w + P7
+//
+(p0) fmpy.s1 FR_w4 = FR_wsq, FR_wsq
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 FR_Y_hi = FR_W, FR_Y_hi, FR_P2
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 FR_Y_lo = FR_W, FR_Y_lo, FR_P6
+(p0) add GR_Perturb = 0x1, r0 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// w4 = w2 * w2
+// Y_hi = y_hi * w + p2
+// Y_lo = y_lo * w + p6
+// Create perturbation bit
+//
+(p0) fmpy.s1 FR_w6 = FR_w4, FR_wsq
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 FR_Y_hi = FR_W, FR_Y_hi, FR_P1
+ nop.i 999
+}
+//
+// Y_hi = y_hi * w + p1
+// w6 = w4 * w2
+//
+
+{ .mfi
+(p0) setf.sig FR_Q4 = GR_Perturb
+(p0) fma.s1 FR_Y_lo = FR_W, FR_Y_lo, FR_P5
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 FR_Y_hi = FR_wsq,FR_Y_hi, FR_W
+ nop.i 999
+}
+
+{ .mfb
+ nop.m 999
+//
+// Y_hi = y_hi * wsq + w
+// Y_lo = y_lo * w + p5
+//
+(p0) fmpy.s1 FR_Y_lo = FR_w6, FR_Y_lo
+//
+// Y_lo = y_lo * w6
+//
+// (p0) for FR_dummy = FR_Y_lo,FR_dummy ;;
+//
+// Set lsb on: Taken out to improve performance
+//
+// (p0) fmerge.se FR_Y_lo = FR_Y_lo,FR_dummy ;;
+//
+// Make sure it's on in Y_lo also. Taken out to improve
+// performance
+//
+(p0) br.cond.sptk L(LOG_main) ;;
+}
+
+
+L(log1p_small):
+
+{ .mmi
+ nop.m 999
+ nop.m 999
+// /*******************************************************/
+// /*********** Branch log1p_small ***********************/
+// /*******************************************************/
+(p0) addl GR_Table_Base = @ltoff(Constants_Threshold#),gp
+}
+
+{ .mfi
+ nop.m 999
+(p0) mov FR_Em1 = FR_W
+(p0) cmp.eq.unc p7, p0 = r0, r0 ;;
+}
+
+{ .mlx
+ ld8 GR_Table_Base = [GR_Table_Base]
+(p0) movl GR_Expo_Range = 0x0000000000000002 ;;
+}
+//
+// Set Safe to true
+// Set Expo_Range = 0 for single
+// Set Expo_Range = 2 for double
+// Set Expo_Range = 4 for double-extended
+//
+
+{ .mmi
+(p0) shladd GR_Table_Base = GR_Expo_Range,4,GR_Table_Base ;;
+(p0) ldfe FR_Threshold = [GR_Table_Base],16
+ nop.i 999
+}
+
+{ .mlx
+ nop.m 999
+(p0) movl GR_Bias = 0x000000000000FF9B ;;
+}
+
+{ .mfi
+(p0) ldfe FR_Tiny = [GR_Table_Base],0
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fcmp.gt.unc.s1 p13, p12 = FR_abs_W, FR_Threshold
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p13) fnmpy.s1 FR_Y_lo = FR_W, FR_W
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p13) fadd FR_SCALE = f0, f1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p12) fsub.s1 FR_Y_lo = f0, FR_Tiny
+(p12) cmp.ne.unc p7, p0 = r0, r0
+}
+
+{ .mfi
+(p12) setf.exp FR_SCALE = GR_Bias
+ nop.f 999
+ nop.i 999 ;;
+}
+
+//
+// Set p7 to SAFE = FALSE
+// Set Scale = 2^-100
+//
+{ .mfb
+ nop.m 999
+(p0) fma.d.s0 FR_Input_X = FR_Y_lo,FR_SCALE,FR_Y_hi
+(p0) br.ret.sptk b0
+}
+;;
+
+L(LOG_64_one):
+
+{ .mfb
+ nop.m 999
+(p0) fmpy.d.s0 FR_Input_X = FR_Input_X, f0
+(p0) br.ret.sptk b0
+}
+;;
+
+//
+// Raise divide by zero for +/-0 input.
+//
+L(LOG_64_zero):
+
+{ .mfi
+(p0) mov GR_Parameter_TAG = 140
+//
+// If we have log1p(0), return -Inf.
+//
+(p0) fsub.s0 FR_Output_X_tmp = f0, f1
+ nop.i 999 ;;
+}
+{ .mfb
+ nop.m 999
+(p0) frcpa.s0 FR_Output_X_tmp, p8 = FR_Output_X_tmp, f0
+(p0) br.cond.sptk L(LOG_ERROR_Support) ;;
+}
+
+L(LOG_64_special):
+
+{ .mfi
+ nop.m 999
+//
+// Return -Inf or value from handler.
+//
+(p0) fclass.m.unc p7, p0 = FR_Input_X, 0x1E1
+ nop.i 999 ;;
+}
+{ .mfb
+ nop.m 999
+//
+// Check for Natval, QNan, SNaN, +Inf
+//
+(p7) fmpy.d.s0 f8 = FR_Input_X, f1
+//
+// For SNaN raise invalid and return QNaN.
+// For QNaN raise invalid and return QNaN.
+// For +Inf return +Inf.
+//
+(p7) br.ret.sptk b0
+}
+;;
+
+//
+// For -Inf raise invalid and return QNaN.
+//
+
+{ .mfb
+(p0) mov GR_Parameter_TAG = 141
+(p0) fmpy.d.s0 FR_Output_X_tmp = FR_Input_X, f0
+(p0) br.cond.sptk L(LOG_ERROR_Support) ;;
+}
+
+//
+// Report that log1p(-Inf) computed
+//
+
+L(LOG_64_unsupported):
+
+//
+// Return generated NaN or other value .
+//
+
+{ .mfb
+ nop.m 999
+(p0) fmpy.d.s0 FR_Input_X = FR_Input_X, f0
+(p0) br.ret.sptk b0 ;;
+}
+
+L(LOG_64_negative):
+
+{ .mfi
+ nop.m 999
+//
+// Deal with x < 0 in a special way
+//
+(p0) frcpa.s0 FR_Output_X_tmp, p8 = f0, f0
+//
+// Deal with x < 0 in a special way - raise
+// invalid and produce QNaN indefinite.
+//
+(p0) mov GR_Parameter_TAG = 141
+}
+
+.endp log1p#
+ASM_SIZE_DIRECTIVE(log1p)
+
+.proc __libm_error_region
+__libm_error_region:
+L(LOG_ERROR_Support):
+.prologue
+
+// (1)
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+
+
+// (2)
+{ .mmi
+ stfd [GR_Parameter_Y] = f0,16 // STORE Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+
+.body
+// (3)
+{ .mib
+ stfd [GR_Parameter_X] =FR_Input_X // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
+}
+{ .mib
+ stfd [GR_Parameter_Y] = FR_Output_X_tmp // STORE Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+
+// (4)
+{ .mmi
+ ldfd FR_Input_X = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+.proc __libm_LOG_main
+__libm_LOG_main:
+L(LOG_main):
+
+//
+// kernel_log_64 computes ln(X + E)
+//
+
+{ .mfi
+ nop.m 999
+(p7) fadd.d.s0 FR_Input_X = FR_Y_lo,FR_Y_hi
+ nop.i 999
+}
+
+{ .mmi
+ nop.m 999
+ nop.m 999
+(p14) addl GR_Table_Base = @ltoff(Constants_1_by_LN10#),gp ;;
+}
+
+{ .mmi
+ nop.m 999
+(p14) ld8 GR_Table_Base = [GR_Table_Base]
+ nop.i 999
+};;
+
+{ .mmi
+(p14) ldfe FR_1LN10_hi = [GR_Table_Base],16 ;;
+(p14) ldfe FR_1LN10_lo = [GR_Table_Base]
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p14) fmpy.s1 FR_Output_X_tmp = FR_Y_lo,FR_1LN10_hi
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p14) fma.s1 FR_Output_X_tmp = FR_Y_hi,FR_1LN10_lo,FR_Output_X_tmp
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p14) fma.d.s0 FR_Input_X = FR_Y_hi,FR_1LN10_hi,FR_Output_X_tmp
+(p0) br.ret.sptk b0 ;;
+}
+.endp __libm_LOG_main
+ASM_SIZE_DIRECTIVE(__libm_LOG_main)
+
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/s_log1pf.S b/sysdeps/ia64/fpu/s_log1pf.S
new file mode 100644
index 0000000..7f21cca
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_log1pf.S
@@ -0,0 +1,1616 @@
+.file "log1pf.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00 Initial version
+// 4/04/00 Unwind support added
+// 8/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+//
+// *********************************************************************
+//
+// Function: log1pf(x) = ln(x+1), for single precision values
+//
+// *********************************************************************
+//
+// Accuracy: Very accurate for single precision values
+//
+// *********************************************************************
+//
+// Resources Used:
+//
+// Floating-Point Registers: f8 (Input and Return Value)
+// f9,f33-f55,f99
+//
+// General Purpose Registers:
+// r32-r53
+// r54-r57 (Used to pass arguments to error handling routine)
+//
+// Predicate Registers: p6-p15
+//
+// *********************************************************************
+//
+// IEEE Special Conditions:
+//
+// Denormal fault raised on denormal inputs
+// Overflow exceptions cannot occur
+// Underflow exceptions raised when appropriate for log1pf
+// (Error Handling Routine called for underflow)
+// Inexact raised when appropriate by algorithm
+//
+// log1pf(inf) = inf
+// log1pf(-inf) = QNaN
+// log1pf(+/-0) = +/-0
+// log1pf(-1) = -inf
+// log1pf(SNaN) = QNaN
+// log1pf(QNaN) = QNaN
+// log1pf(EM_special Values) = QNaN
+//
+// *********************************************************************
+//
+// Computation is based on the following kernel.
+//
+// ker_log_64( in_FR : X,
+// in_FR : E,
+// in_FR : Em1,
+// in_GR : Expo_Range,
+// out_FR : Y_hi,
+// out_FR : Y_lo,
+// out_FR : Scale,
+// out_PR : Safe )
+//
+// Overview
+//
+// The method consists of three cases.
+//
+// If |X+Em1| < 2^(-80) use case log1pf_small;
+// elseif |X+Em1| < 2^(-7) use case log_near1;
+// else use case log_regular;
+//
+// Case log1pf_small:
+//
+// log( 1 + (X+Em1) ) can be approximated by (X+Em1).
+//
+// Case log_near1:
+//
+// log( 1 + (X+Em1) ) can be approximated by a simple polynomial
+// in W = X+Em1. This polynomial resembles the truncated Taylor
+// series W - W^/2 + W^3/3 - ...
+//
+// Case log_regular:
+//
+// Here we use a table lookup method. The basic idea is that in
+// order to compute log(Arg) for an argument Arg in [1,2), we
+// construct a value G such that G*Arg is close to 1 and that
+// log(1/G) is obtainable easily from a table of values calculated
+// beforehand. Thus
+//
+// log(Arg) = log(1/G) + log(G*Arg)
+// = log(1/G) + log(1 + (G*Arg - 1))
+//
+// Because |G*Arg - 1| is small, the second term on the right hand
+// side can be approximated by a short polynomial. We elaborate
+// this method in four steps.
+//
+// Step 0: Initialization
+//
+// We need to calculate log( E + X ). Obtain N, S_hi, S_lo such that
+//
+// E + X = 2^N * ( S_hi + S_lo ) exactly
+//
+// where S_hi in [1,2) and S_lo is a correction to S_hi in the sense
+// that |S_lo| <= ulp(S_hi).
+//
+// Step 1: Argument Reduction
+//
+// Based on S_hi, obtain G_1, G_2, G_3 from a table and calculate
+//
+// G := G_1 * G_2 * G_3
+// r := (G * S_hi - 1) + G * S_lo
+//
+// These G_j's have the property that the product is exactly
+// representable and that |r| < 2^(-12) as a result.
+//
+// Step 2: Approximation
+//
+//
+// log(1 + r) is approximated by a short polynomial poly(r).
+//
+// Step 3: Reconstruction
+//
+//
+// Finally, log( E + X ) is given by
+//
+// log( E + X ) = log( 2^N * (S_hi + S_lo) )
+// ~=~ N*log(2) + log(1/G) + log(1 + r)
+// ~=~ N*log(2) + log(1/G) + poly(r).
+//
+// **** Algorithm ****
+//
+// Case log1pf_small:
+//
+// Although log(1 + (X+Em1)) is basically X+Em1, we would like to
+// preserve the inexactness nature as well as consistent behavior
+// under different rounding modes. Note that this case can only be
+// taken if E is set to be 1.0. In this case, Em1 is zero, and that
+// X can be very tiny and thus the final result can possibly underflow.
+// Thus, we compare X against a threshold that is dependent on the
+// input Expo_Range. If |X| is smaller than this threshold, we set
+// SAFE to be FALSE.
+//
+// The result is returned as Y_hi, Y_lo, and in the case of SAFE
+// is FALSE, an additional value Scale is also returned.
+//
+// W := X + Em1
+// Threshold := Threshold_Table( Expo_Range )
+// Tiny := Tiny_Table( Expo_Range )
+//
+// If ( |W| > Threshold ) then
+// Y_hi := W
+// Y_lo := -W*W
+// Else
+// Y_hi := W
+// Y_lo := -Tiny
+// Scale := 2^(-100)
+// Safe := FALSE
+// EndIf
+//
+//
+// One may think that Y_lo should be -W*W/2; however, it does not matter
+// as Y_lo will be rounded off completely except for the correct effect in
+// directed rounding. Clearly -W*W is simplier to compute. Moreover,
+// because of the difference in exponent value, Y_hi + Y_lo or
+// Y_hi + Scale*Y_lo is always inexact.
+//
+// Case log_near1:
+//
+// Here we compute a simple polynomial. To exploit parallelism, we split
+// the polynomial into two portions.
+//
+// W := X + Em1
+// Wsq := W * W
+// W4 := Wsq*Wsq
+// W6 := W4*Wsq
+// Y_hi := W + Wsq*(P_1 + W*(P_2 + W*(P_3 + W*P_4))
+// Y_lo := W6*(P_5 + W*(P_6 + W*(P_7 + W*P_8)))
+// set lsb(Y_lo) to be 1
+//
+// Case log_regular:
+//
+// We present the algorithm in four steps.
+//
+// Step 0. Initialization
+// ----------------------
+//
+// Z := X + E
+// N := unbaised exponent of Z
+// S_hi := 2^(-N) * Z
+// S_lo := 2^(-N) * { (max(X,E)-Z) + min(X,E) }
+//
+// Note that S_lo is always 0 for the case E = 0.
+//
+// Step 1. Argument Reduction
+// --------------------------
+//
+// Let
+//
+// Z = 2^N * S_hi = 2^N * 1.d_1 d_2 d_3 ... d_63
+//
+// We obtain G_1, G_2, G_3 by the following steps.
+//
+//
+// Define X_0 := 1.d_1 d_2 ... d_14. This is extracted
+// from S_hi.
+//
+// Define A_1 := 1.d_1 d_2 d_3 d_4. This is X_0 truncated
+// to lsb = 2^(-4).
+//
+// Define index_1 := [ d_1 d_2 d_3 d_4 ].
+//
+// Fetch Z_1 := (1/A_1) rounded UP in fixed point with
+// fixed point lsb = 2^(-15).
+// Z_1 looks like z_0.z_1 z_2 ... z_15
+// Note that the fetching is done using index_1.
+// A_1 is actually not needed in the implementation
+// and is used here only to explain how is the value
+// Z_1 defined.
+//
+// Fetch G_1 := (1/A_1) truncated to 21 sig. bits.
+// floating pt. Again, fetching is done using index_1. A_1
+// explains how G_1 is defined.
+//
+// Calculate X_1 := X_0 * Z_1 truncated to lsb = 2^(-14)
+// = 1.0 0 0 0 d_5 ... d_14
+// This is accomplised by integer multiplication.
+// It is proved that X_1 indeed always begin
+// with 1.0000 in fixed point.
+//
+//
+// Define A_2 := 1.0 0 0 0 d_5 d_6 d_7 d_8. This is X_1
+// truncated to lsb = 2^(-8). Similar to A_1,
+// A_2 is not needed in actual implementation. It
+// helps explain how some of the values are defined.
+//
+// Define index_2 := [ d_5 d_6 d_7 d_8 ].
+//
+// Fetch Z_2 := (1/A_2) rounded UP in fixed point with
+// fixed point lsb = 2^(-15). Fetch done using index_2.
+// Z_2 looks like z_0.z_1 z_2 ... z_15
+//
+// Fetch G_2 := (1/A_2) truncated to 21 sig. bits.
+// floating pt.
+//
+// Calculate X_2 := X_1 * Z_2 truncated to lsb = 2^(-14)
+// = 1.0 0 0 0 0 0 0 0 d_9 d_10 ... d_14
+// This is accomplised by integer multiplication.
+// It is proved that X_2 indeed always begin
+// with 1.00000000 in fixed point.
+//
+//
+// Define A_3 := 1.0 0 0 0 0 0 0 0 d_9 d_10 d_11 d_12 d_13 1.
+// This is 2^(-14) + X_2 truncated to lsb = 2^(-13).
+//
+// Define index_3 := [ d_9 d_10 d_11 d_12 d_13 ].
+//
+// Fetch G_3 := (1/A_3) truncated to 21 sig. bits.
+// floating pt. Fetch is done using index_3.
+//
+// Compute G := G_1 * G_2 * G_3.
+//
+// This is done exactly since each of G_j only has 21 sig. bits.
+//
+// Compute
+//
+// r := (G*S_hi - 1) + G*S_lo using 2 FMA operations.
+//
+// thus, r approximates G*(S_hi+S_lo) - 1 to within a couple of
+// rounding errors.
+//
+//
+// Step 2. Approximation
+// ---------------------
+//
+// This step computes an approximation to log( 1 + r ) where r is the
+// reduced argument just obtained. It is proved that |r| <= 1.9*2^(-13);
+// thus log(1+r) can be approximated by a short polynomial:
+//
+// log(1+r) ~=~ poly = r + Q1 r^2 + ... + Q4 r^5
+//
+//
+// Step 3. Reconstruction
+// ----------------------
+//
+// This step computes the desired result of log(X+E):
+//
+// log(X+E) = log( 2^N * (S_hi + S_lo) )
+// = N*log(2) + log( S_hi + S_lo )
+// = N*log(2) + log(1/G) +
+// log(1 + C*(S_hi+S_lo) - 1 )
+//
+// log(2), log(1/G_j) are stored as pairs of (single,double) numbers:
+// log2_hi, log2_lo, log1byGj_hi, log1byGj_lo. The high parts are
+// single-precision numbers and the low parts are double precision
+// numbers. These have the property that
+//
+// N*log2_hi + SUM ( log1byGj_hi )
+//
+// is computable exactly in double-extended precision (64 sig. bits).
+// Finally
+//
+// Y_hi := N*log2_hi + SUM ( log1byGj_hi )
+// Y_lo := poly_hi + [ poly_lo +
+// ( SUM ( log1byGj_lo ) + N*log2_lo ) ]
+// set lsb(Y_lo) to be 1
+//
+
+#include "libm_support.h"
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
+// P_7, P_6, P_5, P_4, P_3, P_2, and P_1
+
+.align 64
+Constants_P:
+ASM_TYPE_DIRECTIVE(Constants_P,@object)
+data4 0xEFD62B15,0xE3936754,0x00003FFB,0x00000000
+data4 0xA5E56381,0x8003B271,0x0000BFFC,0x00000000
+data4 0x73282DB0,0x9249248C,0x00003FFC,0x00000000
+data4 0x47305052,0xAAAAAA9F,0x0000BFFC,0x00000000
+data4 0xCCD17FC9,0xCCCCCCCC,0x00003FFC,0x00000000
+data4 0x00067ED5,0x80000000,0x0000BFFD,0x00000000
+data4 0xAAAAAAAA,0xAAAAAAAA,0x00003FFD,0x00000000
+data4 0xFFFFFFFE,0xFFFFFFFF,0x0000BFFD,0x00000000
+ASM_SIZE_DIRECTIVE(Constants_P)
+
+// log2_hi, log2_lo, Q_4, Q_3, Q_2, and Q_1
+
+.align 64
+Constants_Q:
+ASM_TYPE_DIRECTIVE(Constants_Q,@object)
+data4 0x00000000,0xB1721800,0x00003FFE,0x00000000
+data4 0x4361C4C6,0x82E30865,0x0000BFE2,0x00000000
+data4 0x328833CB,0xCCCCCAF2,0x00003FFC,0x00000000
+data4 0xA9D4BAFB,0x80000077,0x0000BFFD,0x00000000
+data4 0xAAABE3D2,0xAAAAAAAA,0x00003FFD,0x00000000
+data4 0xFFFFDAB7,0xFFFFFFFF,0x0000BFFD,0x00000000
+ASM_SIZE_DIRECTIVE(Constants_Q)
+
+// Z1 - 16 bit fixed, G1 and H1 - IEEE single
+
+.align 64
+Constants_Z_G_H_h1:
+ASM_TYPE_DIRECTIVE(Constants_Z_G_H_h1,@object)
+data4 0x00008000,0x3F800000,0x00000000,0x00000000,0x00000000,0x00000000
+data4 0x00007879,0x3F70F0F0,0x3D785196,0x00000000,0x617D741C,0x3DA163A6
+data4 0x000071C8,0x3F638E38,0x3DF13843,0x00000000,0xCBD3D5BB,0x3E2C55E6
+data4 0x00006BCB,0x3F579430,0x3E2FF9A0,0x00000000,0xD86EA5E7,0xBE3EB0BF
+data4 0x00006667,0x3F4CCCC8,0x3E647FD6,0x00000000,0x86B12760,0x3E2E6A8C
+data4 0x00006187,0x3F430C30,0x3E8B3AE7,0x00000000,0x5C0739BA,0x3E47574C
+data4 0x00005D18,0x3F3A2E88,0x3EA30C68,0x00000000,0x13E8AF2F,0x3E20E30F
+data4 0x0000590C,0x3F321640,0x3EB9CEC8,0x00000000,0xF2C630BD,0xBE42885B
+data4 0x00005556,0x3F2AAAA8,0x3ECF9927,0x00000000,0x97E577C6,0x3E497F34
+data4 0x000051EC,0x3F23D708,0x3EE47FC5,0x00000000,0xA6B0A5AB,0x3E3E6A6E
+data4 0x00004EC5,0x3F1D89D8,0x3EF8947D,0x00000000,0xD328D9BE,0xBDF43E3C
+data4 0x00004BDB,0x3F17B420,0x3F05F3A1,0x00000000,0x0ADB090A,0x3E4094C3
+data4 0x00004925,0x3F124920,0x3F0F4303,0x00000000,0xFC1FE510,0xBE28FBB2
+data4 0x0000469F,0x3F0D3DC8,0x3F183EBF,0x00000000,0x10FDE3FA,0x3E3A7895
+data4 0x00004445,0x3F088888,0x3F20EC80,0x00000000,0x7CC8C98F,0x3E508CE5
+data4 0x00004211,0x3F042108,0x3F29516A,0x00000000,0xA223106C,0xBE534874
+ASM_SIZE_DIRECTIVE(Constants_Z_G_H_h1)
+
+// Z2 - 16 bit fixed, G2 and H2 - IEEE single
+
+.align 64
+Constants_Z_G_H_h2:
+ASM_TYPE_DIRECTIVE(Constants_Z_G_H_h2,@object)
+data4 0x00008000,0x3F800000,0x00000000,0x00000000,0x00000000,0x00000000
+data4 0x00007F81,0x3F7F00F8,0x3B7F875D,0x00000000,0x22C42273,0x3DB5A116
+data4 0x00007F02,0x3F7E03F8,0x3BFF015B,0x00000000,0x21F86ED3,0x3DE620CF
+data4 0x00007E85,0x3F7D08E0,0x3C3EE393,0x00000000,0x484F34ED,0xBDAFA07E
+data4 0x00007E08,0x3F7C0FC0,0x3C7E0586,0x00000000,0x3860BCF6,0xBDFE07F0
+data4 0x00007D8D,0x3F7B1880,0x3C9E75D2,0x00000000,0xA78093D6,0x3DEA370F
+data4 0x00007D12,0x3F7A2328,0x3CBDC97A,0x00000000,0x72A753D0,0x3DFF5791
+data4 0x00007C98,0x3F792FB0,0x3CDCFE47,0x00000000,0xA7EF896B,0x3DFEBE6C
+data4 0x00007C20,0x3F783E08,0x3CFC15D0,0x00000000,0x409ECB43,0x3E0CF156
+data4 0x00007BA8,0x3F774E38,0x3D0D874D,0x00000000,0xFFEF71DF,0xBE0B6F97
+data4 0x00007B31,0x3F766038,0x3D1CF49B,0x00000000,0x5D59EEE8,0xBE080483
+data4 0x00007ABB,0x3F757400,0x3D2C531D,0x00000000,0xA9192A74,0x3E1F91E9
+data4 0x00007A45,0x3F748988,0x3D3BA322,0x00000000,0xBF72A8CD,0xBE139A06
+data4 0x000079D1,0x3F73A0D0,0x3D4AE46F,0x00000000,0xF8FBA6CF,0x3E1D9202
+data4 0x0000795D,0x3F72B9D0,0x3D5A1756,0x00000000,0xBA796223,0xBE1DCCC4
+data4 0x000078EB,0x3F71D488,0x3D693B9D,0x00000000,0xB6B7C239,0xBE049391
+ASM_SIZE_DIRECTIVE(Constants_Z_G_H_h2)
+
+// G3 and H3 - IEEE single and h3 -IEEE double
+
+.align 64
+Constants_Z_G_H_h3:
+ASM_TYPE_DIRECTIVE(Constants_Z_G_H_h3,@object)
+data4 0x3F7FFC00,0x38800100,0x562224CD,0x3D355595
+data4 0x3F7FF400,0x39400480,0x06136FF6,0x3D8200A2
+data4 0x3F7FEC00,0x39A00640,0xE8DE9AF0,0x3DA4D68D
+data4 0x3F7FE400,0x39E00C41,0xB10238DC,0xBD8B4291
+data4 0x3F7FDC00,0x3A100A21,0x3B1952CA,0xBD89CCB8
+data4 0x3F7FD400,0x3A300F22,0x1DC46826,0xBDB10707
+data4 0x3F7FCC08,0x3A4FF51C,0xF43307DB,0x3DB6FCB9
+data4 0x3F7FC408,0x3A6FFC1D,0x62DC7872,0xBD9B7C47
+data4 0x3F7FBC10,0x3A87F20B,0x3F89154A,0xBDC3725E
+data4 0x3F7FB410,0x3A97F68B,0x62B9D392,0xBD93519D
+data4 0x3F7FAC18,0x3AA7EB86,0x0F21BD9D,0x3DC18441
+data4 0x3F7FA420,0x3AB7E101,0x2245E0A6,0xBDA64B95
+data4 0x3F7F9C20,0x3AC7E701,0xAABB34B8,0x3DB4B0EC
+data4 0x3F7F9428,0x3AD7DD7B,0x6DC40A7E,0x3D992337
+data4 0x3F7F8C30,0x3AE7D474,0x4F2083D3,0x3DC6E17B
+data4 0x3F7F8438,0x3AF7CBED,0x811D4394,0x3DAE314B
+data4 0x3F7F7C40,0x3B03E1F3,0xB08F2DB1,0xBDD46F21
+data4 0x3F7F7448,0x3B0BDE2F,0x6D34522B,0xBDDC30A4
+data4 0x3F7F6C50,0x3B13DAAA,0xB1F473DB,0x3DCB0070
+data4 0x3F7F6458,0x3B1BD766,0x6AD282FD,0xBDD65DDC
+data4 0x3F7F5C68,0x3B23CC5C,0xF153761A,0xBDCDAB83
+data4 0x3F7F5470,0x3B2BC997,0x341D0F8F,0xBDDADA40
+data4 0x3F7F4C78,0x3B33C711,0xEBC394E8,0x3DCD1BD7
+data4 0x3F7F4488,0x3B3BBCC6,0x52E3E695,0xBDC3532B
+data4 0x3F7F3C90,0x3B43BAC0,0xE846B3DE,0xBDA3961E
+data4 0x3F7F34A0,0x3B4BB0F4,0x785778D4,0xBDDADF06
+data4 0x3F7F2CA8,0x3B53AF6D,0xE55CE212,0x3DCC3ED1
+data4 0x3F7F24B8,0x3B5BA620,0x9E382C15,0xBDBA3103
+data4 0x3F7F1CC8,0x3B639D12,0x5C5AF197,0x3D635A0B
+data4 0x3F7F14D8,0x3B6B9444,0x71D34EFC,0xBDDCCB19
+data4 0x3F7F0CE0,0x3B7393BC,0x52CD7ADA,0x3DC74502
+data4 0x3F7F04F0,0x3B7B8B6D,0x7D7F2A42,0xBDB68F17
+ASM_SIZE_DIRECTIVE(Constants_Z_G_H_h3)
+
+//
+// Exponent Thresholds and Tiny Thresholds
+// for 8, 11, 15, and 17 bit exponents
+//
+// Expo_Range Value
+//
+// 0 (8 bits) 2^(-126)
+// 1 (11 bits) 2^(-1022)
+// 2 (15 bits) 2^(-16382)
+// 3 (17 bits) 2^(-16382)
+//
+// Tiny_Table
+// ----------
+// Expo_Range Value
+//
+// 0 (8 bits) 2^(-16382)
+// 1 (11 bits) 2^(-16382)
+// 2 (15 bits) 2^(-16382)
+// 3 (17 bits) 2^(-16382)
+//
+
+.align 64
+Constants_Threshold:
+ASM_TYPE_DIRECTIVE(Constants_Threshold,@object)
+data4 0x00000000,0x80000000,0x00003F81,0x00000000
+data4 0x00000000,0x80000000,0x00000001,0x00000000
+data4 0x00000000,0x80000000,0x00003C01,0x00000000
+data4 0x00000000,0x80000000,0x00000001,0x00000000
+data4 0x00000000,0x80000000,0x00000001,0x00000000
+data4 0x00000000,0x80000000,0x00000001,0x00000000
+data4 0x00000000,0x80000000,0x00000001,0x00000000
+data4 0x00000000,0x80000000,0x00000001,0x00000000
+ASM_SIZE_DIRECTIVE(Constants_Threshold)
+
+.align 64
+Constants_1_by_LN10:
+ASM_TYPE_DIRECTIVE(Constants_1_by_LN10,@object)
+data4 0x37287195,0xDE5BD8A9,0x00003FFD,0x00000000
+data4 0xACCF70C8,0xD56EAABE,0x00003FBD,0x00000000
+ASM_SIZE_DIRECTIVE(Constants_1_by_LN10)
+
+FR_Input_X = f8
+FR_Neg_One = f9
+FR_E = f33
+FR_Em1 = f34
+FR_Y_hi = f34
+// Shared with Em1
+FR_Y_lo = f35
+FR_Scale = f36
+FR_X_Prime = f37
+FR_Z = f38
+FR_S_hi = f38
+// Shared with Z
+FR_W = f39
+FR_G = f40
+FR_wsq = f40
+// Shared with G
+FR_H = f41
+FR_w4 = f41
+// Shared with H
+FR_h = f42
+FR_w6 = f42
+// Shared with h
+FR_G_tmp = f43
+FR_poly_lo = f43
+// Shared with G_tmp
+FR_P8 = f43
+// Shared with G_tmp
+FR_H_tmp = f44
+FR_poly_hi = f44
+ // Shared with H_tmp
+FR_P7 = f44
+// Shared with H_tmp
+FR_h_tmp = f45
+FR_rsq = f45
+// Shared with h_tmp
+FR_P6 = f45
+// Shared with h_tmp
+FR_abs_W = f46
+FR_r = f46
+// Shared with abs_W
+FR_AA = f47
+FR_log2_hi = f47
+// Shared with AA
+FR_BB = f48
+FR_log2_lo = f48
+// Shared with BB
+FR_S_lo = f49
+FR_two_negN = f50
+FR_float_N = f51
+FR_Q4 = f52
+FR_dummy = f52
+// Shared with Q4
+FR_P4 = f52
+// Shared with Q4
+FR_Threshold = f52
+// Shared with Q4
+FR_Q3 = f53
+FR_P3 = f53
+// Shared with Q3
+FR_Tiny = f53
+// Shared with Q3
+FR_Q2 = f54
+FR_P2 = f54
+// Shared with Q2
+FR_1LN10_hi = f54
+// Shared with Q2
+FR_Q1 = f55
+FR_P1 = f55
+// Shared with Q1
+FR_1LN10_lo = f55
+// Shared with Q1
+FR_P5 = f98
+FR_SCALE = f98
+FR_Output_X_tmp = f99
+
+GR_Expo_Range = r32
+GR_Table_Base = r34
+GR_Table_Base1 = r35
+GR_Table_ptr = r36
+GR_Index2 = r37
+GR_signif = r38
+GR_X_0 = r39
+GR_X_1 = r40
+GR_X_2 = r41
+GR_Z_1 = r42
+GR_Z_2 = r43
+GR_N = r44
+GR_Bias = r45
+GR_M = r46
+GR_ScaleN = r47
+GR_Index3 = r48
+GR_Perturb = r49
+GR_Table_Scale = r50
+
+
+GR_SAVE_PFS = r51
+GR_SAVE_B0 = r52
+GR_SAVE_GP = r53
+
+GR_Parameter_X = r54
+GR_Parameter_Y = r55
+GR_Parameter_RESULT = r56
+
+GR_Parameter_TAG = r57
+
+
+.section .text
+.proc log1pf#
+.global log1pf#
+.align 64
+log1pf:
+#ifdef _LIBC
+.global __log1pf
+__log1pf:
+#endif
+
+{ .mfi
+alloc r32 = ar.pfs,0,22,4,0
+(p0) fsub.s1 FR_Neg_One = f0,f1
+(p0) cmp.eq.unc p7, p0 = r0, r0
+}
+
+{ .mfi
+(p0) cmp.ne.unc p14, p0 = r0, r0
+(p0) fnorm.s1 FR_X_Prime = FR_Input_X
+(p0) cmp.eq.unc p15, p0 = r0, r0 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p6, p0 = FR_Input_X, 0x1E3
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p0) fclass.nm.unc p10, p0 = FR_Input_X, 0x1FF
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p0) fcmp.eq.unc.s1 p9, p0 = FR_Input_X, f0
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fadd FR_Em1 = f0,f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fadd FR_E = f0,f1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fcmp.eq.unc.s1 p8, p0 = FR_Input_X, FR_Neg_One
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fcmp.lt.unc.s1 p13, p0 = FR_Input_X, FR_Neg_One
+ nop.i 999
+}
+
+
+L(LOG_BEGIN):
+
+{ .mfi
+ nop.m 999
+(p0) fadd.s1 FR_Z = FR_X_Prime, FR_E
+ nop.i 999
+}
+
+{ .mlx
+ nop.m 999
+(p0) movl GR_Table_Scale = 0x0000000000000018 ;;
+}
+
+{ .mmi
+ nop.m 999
+//
+// Create E = 1 and Em1 = 0
+// Check for X == 0, meaning log(1+0)
+// Check for X < -1, meaning log(negative)
+// Check for X == -1, meaning log(0)
+// Normalize x
+// Identify NatVals, NaNs, Infs.
+// Identify EM unsupporteds.
+// Identify Negative values - us S1 so as
+// not to raise denormal operand exception
+// Set p15 to true for log1pf
+// Set p14 to false for log1pf
+// Set p7 true for log and log1pf
+//
+(p0) addl GR_Table_Base = @ltoff(Constants_Z_G_H_h1#),gp
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fmax.s1 FR_AA = FR_X_Prime, FR_E
+ nop.i 999 ;;
+}
+
+{ .mfi
+ ld8 GR_Table_Base = [GR_Table_Base]
+(p0) fmin.s1 FR_BB = FR_X_Prime, FR_E
+ nop.i 999
+}
+
+{ .mfb
+ nop.m 999
+(p0) fadd.s1 FR_W = FR_X_Prime, FR_Em1
+//
+// Begin load of constants base
+// FR_Z = Z = |x| + E
+// FR_W = W = |x| + Em1
+// AA = fmax(|x|,E)
+// BB = fmin(|x|,E)
+//
+(p6) br.cond.spnt L(LOG_64_special) ;;
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+(p10) br.cond.spnt L(LOG_64_unsupported) ;;
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+(p13) br.cond.spnt L(LOG_64_negative) ;;
+}
+
+{ .mib
+(p0) getf.sig GR_signif = FR_Z
+ nop.i 999
+(p9) br.cond.spnt L(LOG_64_one) ;;
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+(p8) br.cond.spnt L(LOG_64_zero) ;;
+}
+
+{ .mfi
+(p0) getf.exp GR_N = FR_Z
+//
+// Raise possible denormal operand exception
+// Create Bias
+//
+// This function computes ln( x + e )
+// Input FR 1: FR_X = FR_Input_X
+// Input FR 2: FR_E = FR_E
+// Input FR 3: FR_Em1 = FR_Em1
+// Input GR 1: GR_Expo_Range = GR_Expo_Range = 1
+// Output FR 4: FR_Y_hi
+// Output FR 5: FR_Y_lo
+// Output FR 6: FR_Scale
+// Output PR 7: PR_Safe
+//
+(p0) fsub.s1 FR_S_lo = FR_AA, FR_Z
+//
+// signif = getf.sig(Z)
+// abs_W = fabs(w)
+//
+(p0) extr.u GR_Table_ptr = GR_signif, 59, 4 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fmerge.se FR_S_hi = f1,FR_Z
+(p0) extr.u GR_X_0 = GR_signif, 49, 15
+}
+
+{ .mmi
+ nop.m 999
+(p0) addl GR_Table_Base1 = @ltoff(Constants_Z_G_H_h2#),gp
+ nop.i 999
+}
+;;
+
+{ .mlx
+ ld8 GR_Table_Base1 = [GR_Table_Base1]
+(p0) movl GR_Bias = 0x000000000000FFFF ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fabs FR_abs_W = FR_W
+(p0) pmpyshr2.u GR_Table_ptr = GR_Table_ptr,GR_Table_Scale,0
+}
+
+{ .mfi
+ nop.m 999
+//
+// Branch out for special input values
+//
+(p0) fcmp.lt.unc.s0 p8, p0 = FR_Input_X, f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// X_0 = extr.u(signif,49,15)
+// Index1 = extr.u(signif,59,4)
+//
+(p0) fadd.s1 FR_S_lo = FR_S_lo, FR_BB
+ nop.i 999 ;;
+}
+
+{ .mii
+ nop.m 999
+ nop.i 999 ;;
+//
+// Offset_to_Z1 = 24 * Index1
+// For performance, don't use result
+// for 3 or 4 cycles.
+//
+(p0) add GR_Table_ptr = GR_Table_ptr, GR_Table_Base ;;
+}
+//
+// Add Base to Offset for Z1
+// Create Bias
+
+{ .mmi
+(p0) ld4 GR_Z_1 = [GR_Table_ptr],4 ;;
+(p0) ldfs FR_G = [GR_Table_ptr],4
+ nop.i 999 ;;
+}
+
+{ .mmi
+(p0) ldfs FR_H = [GR_Table_ptr],8 ;;
+(p0) ldfd FR_h = [GR_Table_ptr],0
+(p0) pmpyshr2.u GR_X_1 = GR_X_0,GR_Z_1,15
+}
+//
+// Load Z_1
+// Get Base of Table2
+//
+
+{ .mfi
+(p0) getf.exp GR_M = FR_abs_W
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mii
+ nop.m 999
+ nop.i 999 ;;
+//
+// M = getf.exp(abs_W)
+// S_lo = AA - Z
+// X_1 = pmpyshr2(X_0,Z_1,15)
+//
+(p0) sub GR_M = GR_M, GR_Bias ;;
+}
+//
+// M = M - Bias
+// Load G1
+// N = getf.exp(Z)
+//
+
+{ .mii
+(p0) cmp.gt.unc p11, p0 = -80, GR_M
+(p0) cmp.gt.unc p12, p0 = -7, GR_M ;;
+(p0) extr.u GR_Index2 = GR_X_1, 6, 4 ;;
+}
+
+{ .mib
+ nop.m 999
+//
+// if -80 > M, set p11
+// Index2 = extr.u(X_1,6,4)
+// if -7 > M, set p12
+// Load H1
+//
+(p0) pmpyshr2.u GR_Index2 = GR_Index2,GR_Table_Scale,0
+(p11) br.cond.spnt L(log1pf_small) ;;
+}
+
+{ .mib
+ nop.m 999
+ nop.i 999
+(p12) br.cond.spnt L(log1pf_near) ;;
+}
+
+{ .mii
+(p0) sub GR_N = GR_N, GR_Bias
+//
+// poly_lo = r * poly_lo
+//
+(p0) add GR_Perturb = 0x1, r0 ;;
+(p0) sub GR_ScaleN = GR_Bias, GR_N
+}
+
+{ .mii
+(p0) setf.sig FR_float_N = GR_N
+ nop.i 999 ;;
+//
+// Prepare Index2 - pmpyshr2.u(X_1,Z_2,15)
+// Load h1
+// S_lo = S_lo + BB
+// Branch for -80 > M
+//
+(p0) add GR_Index2 = GR_Index2, GR_Table_Base1
+}
+
+{ .mmi
+(p0) setf.exp FR_two_negN = GR_ScaleN
+ nop.m 999
+(p0) addl GR_Table_Base = @ltoff(Constants_Z_G_H_h3#),gp
+};;
+
+//
+// Index2 points to Z2
+// Branch for -7 > M
+//
+
+{ .mmb
+(p0) ld4 GR_Z_2 = [GR_Index2],4
+ ld8 GR_Table_Base = [GR_Table_Base]
+ nop.b 999 ;;
+}
+(p0) nop.i 999
+//
+// Load Z_2
+// N = N - Bias
+// Tablebase points to Table3
+//
+
+{ .mmi
+(p0) ldfs FR_G_tmp = [GR_Index2],4 ;;
+//
+// Load G_2
+// pmpyshr2 X_2= (X_1,Z_2,15)
+// float_N = setf.sig(N)
+// ScaleN = Bias - N
+//
+(p0) ldfs FR_H_tmp = [GR_Index2],8
+ nop.i 999 ;;
+}
+//
+// Load H_2
+// two_negN = setf.exp(scaleN)
+// G = G_1 * G_2
+//
+
+{ .mfi
+(p0) ldfd FR_h_tmp = [GR_Index2],0
+ nop.f 999
+(p0) pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15 ;;
+}
+
+{ .mii
+ nop.m 999
+(p0) extr.u GR_Index3 = GR_X_2, 1, 5 ;;
+//
+// Load h_2
+// H = H_1 + H_2
+// h = h_1 + h_2
+// Index3 = extr.u(X_2,1,5)
+//
+(p0) shladd GR_Index3 = GR_Index3,4,GR_Table_Base
+}
+
+{ .mmi
+ nop.m 999
+ nop.m 999
+//
+// float_N = fcvt.xf(float_N)
+// load G3
+//
+(p0) addl GR_Table_Base = @ltoff(Constants_Q#),gp ;;
+}
+
+{ .mfi
+ld8 GR_Table_Base = [GR_Table_Base]
+nop.f 999
+nop.i 999
+} ;;
+
+{ .mfi
+(p0) ldfe FR_log2_hi = [GR_Table_Base],16
+(p0) fmpy.s1 FR_S_lo = FR_S_lo, FR_two_negN
+ nop.i 999 ;;
+}
+
+{ .mmf
+ nop.m 999
+//
+// G = G3 * G
+// Load h3
+// Load log2_hi
+// H = H + H3
+//
+(p0) ldfe FR_log2_lo = [GR_Table_Base],16
+(p0) fmpy.s1 FR_G = FR_G, FR_G_tmp ;;
+}
+
+{ .mmf
+(p0) ldfs FR_G_tmp = [GR_Index3],4
+//
+// h = h + h3
+// r = G * S_hi + 1
+// Load log2_lo
+//
+(p0) ldfe FR_Q4 = [GR_Table_Base],16
+(p0) fadd.s1 FR_h = FR_h, FR_h_tmp ;;
+}
+
+{ .mfi
+(p0) ldfe FR_Q3 = [GR_Table_Base],16
+(p0) fadd.s1 FR_H = FR_H, FR_H_tmp
+ nop.i 999 ;;
+}
+
+{ .mmf
+(p0) ldfs FR_H_tmp = [GR_Index3],4
+(p0) ldfe FR_Q2 = [GR_Table_Base],16
+//
+// Comput Index for Table3
+// S_lo = S_lo * two_negN
+//
+(p0) fcvt.xf FR_float_N = FR_float_N ;;
+}
+//
+// If S_lo == 0, set p8 false
+// Load H3
+// Load ptr to table of polynomial coeff.
+//
+
+{ .mmf
+(p0) ldfd FR_h_tmp = [GR_Index3],0
+(p0) ldfe FR_Q1 = [GR_Table_Base],0
+(p0) fcmp.eq.unc.s1 p0, p8 = FR_S_lo, f0 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fmpy.s1 FR_G = FR_G, FR_G_tmp
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fadd.s1 FR_H = FR_H, FR_H_tmp
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fms.s1 FR_r = FR_G, FR_S_hi, f1
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fadd.s1 FR_h = FR_h, FR_h_tmp
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 FR_Y_hi = FR_float_N, FR_log2_hi, FR_H
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// Load Q4
+// Load Q3
+// Load Q2
+// Load Q1
+//
+(p8) fma.s1 FR_r = FR_G, FR_S_lo, FR_r
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+//
+// poly_lo = r * Q4 + Q3
+// rsq = r* r
+//
+(p0) fma.s1 FR_h = FR_float_N, FR_log2_lo, FR_h
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// If (S_lo!=0) r = s_lo * G + r
+//
+(p0) fma.s1 FR_poly_lo = FR_r, FR_Q4, FR_Q3
+ nop.i 999
+}
+//
+// Create a 0x00000....01
+// poly_lo = poly_lo * rsq + h
+//
+
+{ .mfi
+(p0) setf.sig FR_dummy = GR_Perturb
+(p0) fmpy.s1 FR_rsq = FR_r, FR_r
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// h = N * log2_lo + h
+// Y_hi = n * log2_hi + H
+//
+(p0) fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_Q2
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// poly_lo = r * poly_o + Q2
+// poly_hi = Q1 * rsq + r
+//
+(p0) fmpy.s1 FR_poly_lo = FR_poly_lo, FR_r
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 FR_poly_lo = FR_poly_lo, FR_rsq, FR_h
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p0) fadd.s1 FR_Y_lo = FR_poly_hi, FR_poly_lo
+//
+// Create the FR for a binary "or"
+// Y_lo = poly_hi + poly_lo
+//
+// (p0) for FR_dummy = FR_Y_lo,FR_dummy ;;
+//
+// Turn the lsb of Y_lo ON
+//
+// (p0) fmerge.se FR_Y_lo = FR_Y_lo,FR_dummy ;;
+//
+// Merge the new lsb into Y_lo, for alone doesn't
+//
+(p0) br.cond.sptk L(LOG_main) ;;
+}
+
+
+L(log1pf_near):
+
+{ .mmi
+ nop.m 999
+ nop.m 999
+// /*******************************************************/
+// /*********** Branch log1pf_near ************************/
+// /*******************************************************/
+(p0) addl GR_Table_Base = @ltoff(Constants_P#),gp ;;
+}
+//
+// Load base address of poly. coeff.
+//
+{.mmi
+ nop.m 999
+ ld8 GR_Table_Base = [GR_Table_Base]
+ nop.i 999
+};;
+
+{ .mmb
+(p0) add GR_Table_ptr = 0x40,GR_Table_Base
+//
+// Address tables with separate pointers
+//
+(p0) ldfe FR_P8 = [GR_Table_Base],16
+ nop.b 999 ;;
+}
+
+{ .mmb
+(p0) ldfe FR_P4 = [GR_Table_ptr],16
+//
+// Load P4
+// Load P8
+//
+(p0) ldfe FR_P7 = [GR_Table_Base],16
+ nop.b 999 ;;
+}
+
+{ .mmf
+(p0) ldfe FR_P3 = [GR_Table_ptr],16
+//
+// Load P3
+// Load P7
+//
+(p0) ldfe FR_P6 = [GR_Table_Base],16
+(p0) fmpy.s1 FR_wsq = FR_W, FR_W ;;
+}
+
+{ .mfi
+(p0) ldfe FR_P2 = [GR_Table_ptr],16
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 FR_Y_hi = FR_W, FR_P4, FR_P3
+ nop.i 999
+}
+//
+// Load P2
+// Load P6
+// Wsq = w * w
+// Y_hi = p4 * w + p3
+//
+
+{ .mfi
+(p0) ldfe FR_P5 = [GR_Table_Base],16
+(p0) fma.s1 FR_Y_lo = FR_W, FR_P8, FR_P7
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p0) ldfe FR_P1 = [GR_Table_ptr],16
+//
+// Load P1
+// Load P5
+// Y_lo = p8 * w + P7
+//
+(p0) fmpy.s1 FR_w4 = FR_wsq, FR_wsq
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 FR_Y_hi = FR_W, FR_Y_hi, FR_P2
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 FR_Y_lo = FR_W, FR_Y_lo, FR_P6
+(p0) add GR_Perturb = 0x1, r0 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// w4 = w2 * w2
+// Y_hi = y_hi * w + p2
+// Y_lo = y_lo * w + p6
+// Create perturbation bit
+//
+(p0) fmpy.s1 FR_w6 = FR_w4, FR_wsq
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 FR_Y_hi = FR_W, FR_Y_hi, FR_P1
+ nop.i 999
+}
+//
+// Y_hi = y_hi * w + p1
+// w6 = w4 * w2
+//
+
+{ .mfi
+(p0) setf.sig FR_Q4 = GR_Perturb
+(p0) fma.s1 FR_Y_lo = FR_W, FR_Y_lo, FR_P5
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fma.s1 FR_Y_hi = FR_wsq,FR_Y_hi, FR_W
+ nop.i 999
+}
+
+{ .mfb
+ nop.m 999
+//
+// Y_hi = y_hi * wsq + w
+// Y_lo = y_lo * w + p5
+//
+(p0) fmpy.s1 FR_Y_lo = FR_w6, FR_Y_lo
+//
+// Y_lo = y_lo * w6
+//
+// (p0) for FR_dummy = FR_Y_lo,FR_dummy ;;
+//
+// Set lsb on: Taken out to improve performance
+//
+// (p0) fmerge.se FR_Y_lo = FR_Y_lo,FR_dummy ;;
+//
+// Make sure it's on in Y_lo also. Taken out to improve
+// performance
+//
+(p0) br.cond.sptk L(LOG_main) ;;
+}
+
+
+L(log1pf_small):
+
+{ .mmi
+ nop.m 999
+ nop.m 999
+// /*******************************************************/
+// /*********** Branch log1pf_small ***********************/
+// /*******************************************************/
+(p0) addl GR_Table_Base = @ltoff(Constants_Threshold#),gp
+}
+
+{ .mfi
+ nop.m 999
+(p0) mov FR_Em1 = FR_W
+(p0) cmp.eq.unc p7, p0 = r0, r0 ;;
+}
+
+{ .mlx
+ ld8 GR_Table_Base = [GR_Table_Base]
+(p0) movl GR_Expo_Range = 0x0000000000000002 ;;
+}
+//
+// Set Safe to true
+// Set Expo_Range = 0 for single
+// Set Expo_Range = 2 for double
+// Set Expo_Range = 4 for double-extended
+//
+
+{ .mmi
+(p0) shladd GR_Table_Base = GR_Expo_Range,4,GR_Table_Base ;;
+(p0) ldfe FR_Threshold = [GR_Table_Base],16
+ nop.i 999
+}
+
+{ .mlx
+ nop.m 999
+(p0) movl GR_Bias = 0x000000000000FF9B ;;
+}
+
+{ .mfi
+(p0) ldfe FR_Tiny = [GR_Table_Base],0
+ nop.f 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fcmp.gt.unc.s1 p13, p12 = FR_abs_W, FR_Threshold
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p13) fnmpy.s1 FR_Y_lo = FR_W, FR_W
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p13) fadd FR_SCALE = f0, f1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p12) fsub.s1 FR_Y_lo = f0, FR_Tiny
+(p12) cmp.ne.unc p7, p0 = r0, r0
+}
+
+{ .mfi
+(p12) setf.exp FR_SCALE = GR_Bias
+ nop.f 999
+ nop.i 999 ;;
+}
+
+//
+// Set p7 to SAFE = FALSE
+// Set Scale = 2^-100
+//
+{ .mfb
+ nop.m 999
+(p0) fma.s.s0 FR_Input_X = FR_Y_lo,FR_SCALE,FR_Y_hi
+(p0) br.ret.sptk b0
+}
+;;
+
+L(LOG_64_one):
+
+{ .mfb
+ nop.m 999
+(p0) fmpy.s.s0 FR_Input_X = FR_Input_X, f0
+(p0) br.ret.sptk b0
+}
+;;
+//
+// Raise divide by zero for +/-0 input.
+//
+
+L(LOG_64_zero):
+
+{ .mfi
+(p0) mov GR_Parameter_TAG = 142
+//
+// If we have log1pf(0), return -Inf.
+//
+(p0) fsub.s0 FR_Output_X_tmp = f0, f1
+ nop.i 999 ;;
+}
+{ .mfb
+ nop.m 999
+(p0) frcpa.s0 FR_Output_X_tmp, p8 = FR_Output_X_tmp, f0
+(p0) br.cond.sptk L(LOG_ERROR_Support) ;;
+}
+
+L(LOG_64_special):
+
+{ .mfi
+ nop.m 999
+//
+// Return -Inf or value from handler.
+//
+(p0) fclass.m.unc p7, p0 = FR_Input_X, 0x1E1
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+//
+// Check for Natval, QNan, SNaN, +Inf
+//
+(p7) fmpy.s.s0 f8 = FR_Input_X, f1
+//
+// For SNaN raise invalid and return QNaN.
+// For QNaN raise invalid and return QNaN.
+// For +Inf return +Inf.
+//
+(p7) br.ret.sptk b0
+}
+;;
+
+//
+// For -Inf raise invalid and return QNaN.
+//
+
+{ .mfb
+(p0) mov GR_Parameter_TAG = 143
+(p0) fmpy.s.s0 FR_Output_X_tmp = FR_Input_X, f0
+(p0) br.cond.sptk L(LOG_ERROR_Support) ;;
+}
+
+//
+// Report that log1pf(-Inf) computed
+//
+
+L(LOG_64_unsupported):
+
+//
+// Return generated NaN or other value .
+//
+
+{ .mfb
+ nop.m 999
+(p0) fmpy.s.s0 FR_Input_X = FR_Input_X, f0
+(p0) br.ret.sptk b0 ;;
+}
+
+L(LOG_64_negative):
+
+{ .mfi
+ nop.m 999
+//
+// Deal with x < 0 in a special way
+//
+(p0) frcpa.s0 FR_Output_X_tmp, p8 = f0, f0
+//
+// Deal with x < 0 in a special way - raise
+// invalid and produce QNaN indefinite.
+//
+(p0) mov GR_Parameter_TAG = 143;;
+}
+
+.endp log1pf#
+ASM_SIZE_DIRECTIVE(log1pf)
+
+.proc __libm_error_region
+__libm_error_region:
+L(LOG_ERROR_Support):
+.prologue
+
+// (1)
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+
+
+// (2)
+{ .mmi
+ stfs [GR_Parameter_Y] = f0,16 // STORE Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+
+.body
+// (3)
+{ .mib
+ stfs [GR_Parameter_X] =FR_Input_X // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
+}
+{ .mib
+ stfs [GR_Parameter_Y] = FR_Output_X_tmp // STORE Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+
+// (4)
+{ .mmi
+ ldfs FR_Input_X = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+
+.proc __libm_LOG_main
+__libm_LOG_main:
+L(LOG_main):
+
+//
+// kernel_log_64 computes ln(X + E)
+//
+
+{ .mfi
+ nop.m 999
+(p7) fadd.s.s0 FR_Input_X = FR_Y_lo,FR_Y_hi
+ nop.i 999
+}
+
+{ .mmi
+ nop.m 999
+ nop.m 999
+(p14) addl GR_Table_Base = @ltoff(Constants_1_by_LN10#),gp ;;
+}
+
+{ .mmi
+ nop.m 999
+(p14) ld8 GR_Table_Base = [GR_Table_Base]
+ nop.i 999
+};;
+
+{ .mmi
+(p14) ldfe FR_1LN10_hi = [GR_Table_Base],16 ;;
+(p14) ldfe FR_1LN10_lo = [GR_Table_Base]
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p14) fmpy.s1 FR_Output_X_tmp = FR_Y_lo,FR_1LN10_hi
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p14) fma.s1 FR_Output_X_tmp = FR_Y_hi,FR_1LN10_lo,FR_Output_X_tmp
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p14) fma.s.s0 FR_Input_X = FR_Y_hi,FR_1LN10_hi,FR_Output_X_tmp
+(p0) br.ret.sptk b0 ;;
+}
+.endp __libm_LOG_main
+ASM_SIZE_DIRECTIVE(__libm_LOG_main)
+
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/s_log1pl.S b/sysdeps/ia64/fpu/s_log1pl.S
new file mode 100644
index 0000000..54ef807
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_log1pl.S
@@ -0,0 +1,1663 @@
+.file "log1pl.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// *********************************************************************
+//
+// History:
+// 2/02/00 hand-optimized
+// 4/04/00 Unwind support added
+// 8/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+//
+// *********************************************************************
+//
+// *********************************************************************
+//
+// Function: Combined logl(x), log1pl(x), and log10l(x) where
+// logl(x) = ln(x), for double-extended precision x values
+// log1pl(x) = ln(x+1), for double-extended precision x values
+// log10l(x) = log (x), for double-extended precision x values
+// 10
+//
+// *********************************************************************
+//
+// Resources Used:
+//
+// Floating-Point Registers: f8 (Input and Return Value)
+// f9,f33-f55,f99
+//
+// General Purpose Registers:
+// r32-r53
+// r54-r57 (Used to pass arguments to error handling routine)
+//
+// Predicate Registers: p6-p15
+//
+// *********************************************************************
+//
+// IEEE Special Conditions:
+//
+// Denormal fault raised on denormal inputs
+// Overflow exceptions cannot occur
+// Underflow exceptions raised when appropriate for log1p
+// (Error Handling Routine called for underflow)
+// Inexact raised when appropriate by algorithm
+//
+// logl(inf) = inf
+// logl(-inf) = QNaN
+// logl(+/-0) = -inf
+// logl(SNaN) = QNaN
+// logl(QNaN) = QNaN
+// logl(EM_special Values) = QNaN
+// log1pl(inf) = inf
+// log1pl(-inf) = QNaN
+// log1pl(+/-0) = +/-0
+// log1pl(-1) = -inf
+// log1pl(SNaN) = QNaN
+// log1pl(QNaN) = QNaN
+// log1pl(EM_special Values) = QNaN
+// log10l(inf) = inf
+// log10l(-inf) = QNaN
+// log10l(+/-0) = -inf
+// log10l(SNaN) = QNaN
+// log10l(QNaN) = QNaN
+// log10l(EM_special Values) = QNaN
+//
+// *********************************************************************
+//
+// Computation is based on the following kernel.
+//
+// ker_log_64( in_FR : X,
+// in_FR : E,
+// in_FR : Em1,
+// in_GR : Expo_Range,
+// out_FR : Y_hi,
+// out_FR : Y_lo,
+// out_FR : Scale,
+// out_PR : Safe )
+//
+// Overview
+//
+// The method consists of three cases.
+//
+// If |X+Em1| < 2^(-80) use case log1pl_small;
+// elseif |X+Em1| < 2^(-7) use case log_near1;
+// else use case log_regular;
+//
+// Case log1pl_small:
+//
+// logl( 1 + (X+Em1) ) can be approximated by (X+Em1).
+//
+// Case log_near1:
+//
+// logl( 1 + (X+Em1) ) can be approximated by a simple polynomial
+// in W = X+Em1. This polynomial resembles the truncated Taylor
+// series W - W^/2 + W^3/3 - ...
+//
+// Case log_regular:
+//
+// Here we use a table lookup method. The basic idea is that in
+// order to compute logl(Arg) for an argument Arg in [1,2), we
+// construct a value G such that G*Arg is close to 1 and that
+// logl(1/G) is obtainable easily from a table of values calculated
+// beforehand. Thus
+//
+// logl(Arg) = logl(1/G) + logl(G*Arg)
+// = logl(1/G) + logl(1 + (G*Arg - 1))
+//
+// Because |G*Arg - 1| is small, the second term on the right hand
+// side can be approximated by a short polynomial. We elaborate
+// this method in four steps.
+//
+// Step 0: Initialization
+//
+// We need to calculate logl( E + X ). Obtain N, S_hi, S_lo such that
+//
+// E + X = 2^N * ( S_hi + S_lo ) exactly
+//
+// where S_hi in [1,2) and S_lo is a correction to S_hi in the sense
+// that |S_lo| <= ulp(S_hi).
+//
+// Step 1: Argument Reduction
+//
+// Based on S_hi, obtain G_1, G_2, G_3 from a table and calculate
+//
+// G := G_1 * G_2 * G_3
+// r := (G * S_hi - 1) + G * S_lo
+//
+// These G_j's have the property that the product is exactly
+// representable and that |r| < 2^(-12) as a result.
+//
+// Step 2: Approximation
+//
+//
+// logl(1 + r) is approximated by a short polynomial poly(r).
+//
+// Step 3: Reconstruction
+//
+//
+// Finally, logl( E + X ) is given by
+//
+// logl( E + X ) = logl( 2^N * (S_hi + S_lo) )
+// ~=~ N*logl(2) + logl(1/G) + logl(1 + r)
+// ~=~ N*logl(2) + logl(1/G) + poly(r).
+//
+// **** Algorithm ****
+//
+// Case log1pl_small:
+//
+// Although logl(1 + (X+Em1)) is basically X+Em1, we would like to
+// preserve the inexactness nature as well as consistent behavior
+// under different rounding modes. Note that this case can only be
+// taken if E is set to be 1.0. In this case, Em1 is zero, and that
+// X can be very tiny and thus the final result can possibly underflow.
+// Thus, we compare X against a threshold that is dependent on the
+// input Expo_Range. If |X| is smaller than this threshold, we set
+// SAFE to be FALSE.
+//
+// The result is returned as Y_hi, Y_lo, and in the case of SAFE
+// is FALSE, an additional value Scale is also returned.
+//
+// W := X + Em1
+// Threshold := Threshold_Table( Expo_Range )
+// Tiny := Tiny_Table( Expo_Range )
+//
+// If ( |W| > Threshold ) then
+// Y_hi := W
+// Y_lo := -W*W
+// Else
+// Y_hi := W
+// Y_lo := -Tiny
+// Scale := 2^(-100)
+// Safe := FALSE
+// EndIf
+//
+//
+// One may think that Y_lo should be -W*W/2; however, it does not matter
+// as Y_lo will be rounded off completely except for the correct effect in
+// directed rounding. Clearly -W*W is simplier to compute. Moreover,
+// because of the difference in exponent value, Y_hi + Y_lo or
+// Y_hi + Scale*Y_lo is always inexact.
+//
+// Case log_near1:
+//
+// Here we compute a simple polynomial. To exploit parallelism, we split
+// the polynomial into two portions.
+//
+// W := X + Em1
+// Wsq := W * W
+// W4 := Wsq*Wsq
+// W6 := W4*Wsq
+// Y_hi := W + Wsq*(P_1 + W*(P_2 + W*(P_3 + W*P_4))
+// Y_lo := W6*(P_5 + W*(P_6 + W*(P_7 + W*P_8)))
+// set lsb(Y_lo) to be 1
+//
+// Case log_regular:
+//
+// We present the algorithm in four steps.
+//
+// Step 0. Initialization
+// ----------------------
+//
+// Z := X + E
+// N := unbaised exponent of Z
+// S_hi := 2^(-N) * Z
+// S_lo := 2^(-N) * { (max(X,E)-Z) + min(X,E) }
+//
+// Note that S_lo is always 0 for the case E = 0.
+//
+// Step 1. Argument Reduction
+// --------------------------
+//
+// Let
+//
+// Z = 2^N * S_hi = 2^N * 1.d_1 d_2 d_3 ... d_63
+//
+// We obtain G_1, G_2, G_3 by the following steps.
+//
+//
+// Define X_0 := 1.d_1 d_2 ... d_14. This is extracted
+// from S_hi.
+//
+// Define A_1 := 1.d_1 d_2 d_3 d_4. This is X_0 truncated
+// to lsb = 2^(-4).
+//
+// Define index_1 := [ d_1 d_2 d_3 d_4 ].
+//
+// Fetch Z_1 := (1/A_1) rounded UP in fixed point with
+// fixed point lsb = 2^(-15).
+// Z_1 looks like z_0.z_1 z_2 ... z_15
+// Note that the fetching is done using index_1.
+// A_1 is actually not needed in the implementation
+// and is used here only to explain how is the value
+// Z_1 defined.
+//
+// Fetch G_1 := (1/A_1) truncated to 21 sig. bits.
+// floating pt. Again, fetching is done using index_1. A_1
+// explains how G_1 is defined.
+//
+// Calculate X_1 := X_0 * Z_1 truncated to lsb = 2^(-14)
+// = 1.0 0 0 0 d_5 ... d_14
+// This is accomplised by integer multiplication.
+// It is proved that X_1 indeed always begin
+// with 1.0000 in fixed point.
+//
+//
+// Define A_2 := 1.0 0 0 0 d_5 d_6 d_7 d_8. This is X_1
+// truncated to lsb = 2^(-8). Similar to A_1,
+// A_2 is not needed in actual implementation. It
+// helps explain how some of the values are defined.
+//
+// Define index_2 := [ d_5 d_6 d_7 d_8 ].
+//
+// Fetch Z_2 := (1/A_2) rounded UP in fixed point with
+// fixed point lsb = 2^(-15). Fetch done using index_2.
+// Z_2 looks like z_0.z_1 z_2 ... z_15
+//
+// Fetch G_2 := (1/A_2) truncated to 21 sig. bits.
+// floating pt.
+//
+// Calculate X_2 := X_1 * Z_2 truncated to lsb = 2^(-14)
+// = 1.0 0 0 0 0 0 0 0 d_9 d_10 ... d_14
+// This is accomplised by integer multiplication.
+// It is proved that X_2 indeed always begin
+// with 1.00000000 in fixed point.
+//
+//
+// Define A_3 := 1.0 0 0 0 0 0 0 0 d_9 d_10 d_11 d_12 d_13 1.
+// This is 2^(-14) + X_2 truncated to lsb = 2^(-13).
+//
+// Define index_3 := [ d_9 d_10 d_11 d_12 d_13 ].
+//
+// Fetch G_3 := (1/A_3) truncated to 21 sig. bits.
+// floating pt. Fetch is done using index_3.
+//
+// Compute G := G_1 * G_2 * G_3.
+//
+// This is done exactly since each of G_j only has 21 sig. bits.
+//
+// Compute
+//
+// r := (G*S_hi - 1) + G*S_lo using 2 FMA operations.
+//
+// thus, r approximates G*(S_hi+S_lo) - 1 to within a couple of
+// rounding errors.
+//
+//
+// Step 2. Approximation
+// ---------------------
+//
+// This step computes an approximation to logl( 1 + r ) where r is the
+// reduced argument just obtained. It is proved that |r| <= 1.9*2^(-13);
+// thus logl(1+r) can be approximated by a short polynomial:
+//
+// logl(1+r) ~=~ poly = r + Q1 r^2 + ... + Q4 r^5
+//
+//
+// Step 3. Reconstruction
+// ----------------------
+//
+// This step computes the desired result of logl(X+E):
+//
+// logl(X+E) = logl( 2^N * (S_hi + S_lo) )
+// = N*logl(2) + logl( S_hi + S_lo )
+// = N*logl(2) + logl(1/G) +
+// logl(1 + C*(S_hi+S_lo) - 1 )
+//
+// logl(2), logl(1/G_j) are stored as pairs of (single,double) numbers:
+// log2_hi, log2_lo, log1byGj_hi, log1byGj_lo. The high parts are
+// single-precision numbers and the low parts are double precision
+// numbers. These have the property that
+//
+// N*log2_hi + SUM ( log1byGj_hi )
+//
+// is computable exactly in double-extended precision (64 sig. bits).
+// Finally
+//
+// Y_hi := N*log2_hi + SUM ( log1byGj_hi )
+// Y_lo := poly_hi + [ poly_lo +
+// ( SUM ( log1byGj_lo ) + N*log2_lo ) ]
+// set lsb(Y_lo) to be 1
+//
+
+#include "libm_support.h"
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
+// P_7, P_6, P_5, P_4, P_3, P_2, and P_1
+
+.align 64
+Constants_P:
+ASM_TYPE_DIRECTIVE(Constants_P,@object)
+data4 0xEFD62B15,0xE3936754,0x00003FFB,0x00000000
+data4 0xA5E56381,0x8003B271,0x0000BFFC,0x00000000
+data4 0x73282DB0,0x9249248C,0x00003FFC,0x00000000
+data4 0x47305052,0xAAAAAA9F,0x0000BFFC,0x00000000
+data4 0xCCD17FC9,0xCCCCCCCC,0x00003FFC,0x00000000
+data4 0x00067ED5,0x80000000,0x0000BFFD,0x00000000
+data4 0xAAAAAAAA,0xAAAAAAAA,0x00003FFD,0x00000000
+data4 0xFFFFFFFE,0xFFFFFFFF,0x0000BFFD,0x00000000
+ASM_SIZE_DIRECTIVE(Constants_P)
+
+// log2_hi, log2_lo, Q_4, Q_3, Q_2, and Q_1
+
+.align 64
+Constants_Q:
+ASM_TYPE_DIRECTIVE(Constants_Q,@object)
+data4 0x00000000,0xB1721800,0x00003FFE,0x00000000
+data4 0x4361C4C6,0x82E30865,0x0000BFE2,0x00000000
+data4 0x328833CB,0xCCCCCAF2,0x00003FFC,0x00000000
+data4 0xA9D4BAFB,0x80000077,0x0000BFFD,0x00000000
+data4 0xAAABE3D2,0xAAAAAAAA,0x00003FFD,0x00000000
+data4 0xFFFFDAB7,0xFFFFFFFF,0x0000BFFD,0x00000000
+ASM_SIZE_DIRECTIVE(Constants_Q)
+
+// Z1 - 16 bit fixed, G1 and H1 - IEEE single
+
+.align 64
+Constants_Z_G_H_h1:
+ASM_TYPE_DIRECTIVE(Constants_Z_G_H_h1,@object)
+data4 0x00008000,0x3F800000,0x00000000,0x00000000,0x00000000,0x00000000
+data4 0x00007879,0x3F70F0F0,0x3D785196,0x00000000,0x617D741C,0x3DA163A6
+data4 0x000071C8,0x3F638E38,0x3DF13843,0x00000000,0xCBD3D5BB,0x3E2C55E6
+data4 0x00006BCB,0x3F579430,0x3E2FF9A0,0x00000000,0xD86EA5E7,0xBE3EB0BF
+data4 0x00006667,0x3F4CCCC8,0x3E647FD6,0x00000000,0x86B12760,0x3E2E6A8C
+data4 0x00006187,0x3F430C30,0x3E8B3AE7,0x00000000,0x5C0739BA,0x3E47574C
+data4 0x00005D18,0x3F3A2E88,0x3EA30C68,0x00000000,0x13E8AF2F,0x3E20E30F
+data4 0x0000590C,0x3F321640,0x3EB9CEC8,0x00000000,0xF2C630BD,0xBE42885B
+data4 0x00005556,0x3F2AAAA8,0x3ECF9927,0x00000000,0x97E577C6,0x3E497F34
+data4 0x000051EC,0x3F23D708,0x3EE47FC5,0x00000000,0xA6B0A5AB,0x3E3E6A6E
+data4 0x00004EC5,0x3F1D89D8,0x3EF8947D,0x00000000,0xD328D9BE,0xBDF43E3C
+data4 0x00004BDB,0x3F17B420,0x3F05F3A1,0x00000000,0x0ADB090A,0x3E4094C3
+data4 0x00004925,0x3F124920,0x3F0F4303,0x00000000,0xFC1FE510,0xBE28FBB2
+data4 0x0000469F,0x3F0D3DC8,0x3F183EBF,0x00000000,0x10FDE3FA,0x3E3A7895
+data4 0x00004445,0x3F088888,0x3F20EC80,0x00000000,0x7CC8C98F,0x3E508CE5
+data4 0x00004211,0x3F042108,0x3F29516A,0x00000000,0xA223106C,0xBE534874
+ASM_SIZE_DIRECTIVE(Constants_Z_G_H_h1)
+
+// Z2 - 16 bit fixed, G2 and H2 - IEEE single
+
+.align 64
+Constants_Z_G_H_h2:
+ASM_TYPE_DIRECTIVE(Constants_Z_G_H_h2,@object)
+data4 0x00008000,0x3F800000,0x00000000,0x00000000,0x00000000,0x00000000
+data4 0x00007F81,0x3F7F00F8,0x3B7F875D,0x00000000,0x22C42273,0x3DB5A116
+data4 0x00007F02,0x3F7E03F8,0x3BFF015B,0x00000000,0x21F86ED3,0x3DE620CF
+data4 0x00007E85,0x3F7D08E0,0x3C3EE393,0x00000000,0x484F34ED,0xBDAFA07E
+data4 0x00007E08,0x3F7C0FC0,0x3C7E0586,0x00000000,0x3860BCF6,0xBDFE07F0
+data4 0x00007D8D,0x3F7B1880,0x3C9E75D2,0x00000000,0xA78093D6,0x3DEA370F
+data4 0x00007D12,0x3F7A2328,0x3CBDC97A,0x00000000,0x72A753D0,0x3DFF5791
+data4 0x00007C98,0x3F792FB0,0x3CDCFE47,0x00000000,0xA7EF896B,0x3DFEBE6C
+data4 0x00007C20,0x3F783E08,0x3CFC15D0,0x00000000,0x409ECB43,0x3E0CF156
+data4 0x00007BA8,0x3F774E38,0x3D0D874D,0x00000000,0xFFEF71DF,0xBE0B6F97
+data4 0x00007B31,0x3F766038,0x3D1CF49B,0x00000000,0x5D59EEE8,0xBE080483
+data4 0x00007ABB,0x3F757400,0x3D2C531D,0x00000000,0xA9192A74,0x3E1F91E9
+data4 0x00007A45,0x3F748988,0x3D3BA322,0x00000000,0xBF72A8CD,0xBE139A06
+data4 0x000079D1,0x3F73A0D0,0x3D4AE46F,0x00000000,0xF8FBA6CF,0x3E1D9202
+data4 0x0000795D,0x3F72B9D0,0x3D5A1756,0x00000000,0xBA796223,0xBE1DCCC4
+data4 0x000078EB,0x3F71D488,0x3D693B9D,0x00000000,0xB6B7C239,0xBE049391
+ASM_SIZE_DIRECTIVE(Constants_Z_G_H_h2)
+
+// G3 and H3 - IEEE single and h3 -IEEE double
+
+.align 64
+Constants_Z_G_H_h3:
+ASM_TYPE_DIRECTIVE(Constants_Z_G_H_h3,@object)
+data4 0x3F7FFC00,0x38800100,0x562224CD,0x3D355595
+data4 0x3F7FF400,0x39400480,0x06136FF6,0x3D8200A2
+data4 0x3F7FEC00,0x39A00640,0xE8DE9AF0,0x3DA4D68D
+data4 0x3F7FE400,0x39E00C41,0xB10238DC,0xBD8B4291
+data4 0x3F7FDC00,0x3A100A21,0x3B1952CA,0xBD89CCB8
+data4 0x3F7FD400,0x3A300F22,0x1DC46826,0xBDB10707
+data4 0x3F7FCC08,0x3A4FF51C,0xF43307DB,0x3DB6FCB9
+data4 0x3F7FC408,0x3A6FFC1D,0x62DC7872,0xBD9B7C47
+data4 0x3F7FBC10,0x3A87F20B,0x3F89154A,0xBDC3725E
+data4 0x3F7FB410,0x3A97F68B,0x62B9D392,0xBD93519D
+data4 0x3F7FAC18,0x3AA7EB86,0x0F21BD9D,0x3DC18441
+data4 0x3F7FA420,0x3AB7E101,0x2245E0A6,0xBDA64B95
+data4 0x3F7F9C20,0x3AC7E701,0xAABB34B8,0x3DB4B0EC
+data4 0x3F7F9428,0x3AD7DD7B,0x6DC40A7E,0x3D992337
+data4 0x3F7F8C30,0x3AE7D474,0x4F2083D3,0x3DC6E17B
+data4 0x3F7F8438,0x3AF7CBED,0x811D4394,0x3DAE314B
+data4 0x3F7F7C40,0x3B03E1F3,0xB08F2DB1,0xBDD46F21
+data4 0x3F7F7448,0x3B0BDE2F,0x6D34522B,0xBDDC30A4
+data4 0x3F7F6C50,0x3B13DAAA,0xB1F473DB,0x3DCB0070
+data4 0x3F7F6458,0x3B1BD766,0x6AD282FD,0xBDD65DDC
+data4 0x3F7F5C68,0x3B23CC5C,0xF153761A,0xBDCDAB83
+data4 0x3F7F5470,0x3B2BC997,0x341D0F8F,0xBDDADA40
+data4 0x3F7F4C78,0x3B33C711,0xEBC394E8,0x3DCD1BD7
+data4 0x3F7F4488,0x3B3BBCC6,0x52E3E695,0xBDC3532B
+data4 0x3F7F3C90,0x3B43BAC0,0xE846B3DE,0xBDA3961E
+data4 0x3F7F34A0,0x3B4BB0F4,0x785778D4,0xBDDADF06
+data4 0x3F7F2CA8,0x3B53AF6D,0xE55CE212,0x3DCC3ED1
+data4 0x3F7F24B8,0x3B5BA620,0x9E382C15,0xBDBA3103
+data4 0x3F7F1CC8,0x3B639D12,0x5C5AF197,0x3D635A0B
+data4 0x3F7F14D8,0x3B6B9444,0x71D34EFC,0xBDDCCB19
+data4 0x3F7F0CE0,0x3B7393BC,0x52CD7ADA,0x3DC74502
+data4 0x3F7F04F0,0x3B7B8B6D,0x7D7F2A42,0xBDB68F17
+ASM_SIZE_DIRECTIVE(Constants_Z_G_H_h3)
+
+//
+// Exponent Thresholds and Tiny Thresholds
+// for 8, 11, 15, and 17 bit exponents
+//
+// Expo_Range Value
+//
+// 0 (8 bits) 2^(-126)
+// 1 (11 bits) 2^(-1022)
+// 2 (15 bits) 2^(-16382)
+// 3 (17 bits) 2^(-16382)
+//
+// Tiny_Table
+// ----------
+// Expo_Range Value
+//
+// 0 (8 bits) 2^(-16382)
+// 1 (11 bits) 2^(-16382)
+// 2 (15 bits) 2^(-16382)
+// 3 (17 bits) 2^(-16382)
+//
+
+.align 64
+Constants_Threshold:
+ASM_TYPE_DIRECTIVE(Constants_Threshold,@object)
+data4 0x00000000,0x80000000,0x00003F81,0x00000000
+data4 0x00000000,0x80000000,0x00000001,0x00000000
+data4 0x00000000,0x80000000,0x00003C01,0x00000000
+data4 0x00000000,0x80000000,0x00000001,0x00000000
+data4 0x00000000,0x80000000,0x00000001,0x00000000
+data4 0x00000000,0x80000000,0x00000001,0x00000000
+data4 0x00000000,0x80000000,0x00000001,0x00000000
+data4 0x00000000,0x80000000,0x00000001,0x00000000
+ASM_SIZE_DIRECTIVE(Constants_Threshold)
+
+.align 64
+Constants_1_by_LN10:
+ASM_TYPE_DIRECTIVE(Constants_1_by_LN10,@object)
+data4 0x37287195,0xDE5BD8A9,0x00003FFD,0x00000000
+data4 0xACCF70C8,0xD56EAABE,0x00003FBB,0x00000000
+ASM_SIZE_DIRECTIVE(Constants_1_by_LN10)
+
+FR_Input_X = f8
+FR_Neg_One = f9
+FR_E = f33
+FR_Em1 = f34
+FR_Y_hi = f34
+// Shared with Em1
+FR_Y_lo = f35
+FR_Scale = f36
+FR_X_Prime = f37
+FR_Z = f38
+FR_S_hi = f38
+// Shared with Z
+FR_W = f39
+FR_G = f40
+FR_wsq = f40
+// Shared with G
+FR_H = f41
+FR_w4 = f41
+// Shared with H
+FR_h = f42
+FR_w6 = f42
+// Shared with h
+FR_G_tmp = f43
+FR_poly_lo = f43
+// Shared with G_tmp
+FR_P8 = f43
+// Shared with G_tmp
+FR_H_tmp = f44
+FR_poly_hi = f44
+ // Shared with H_tmp
+FR_P7 = f44
+// Shared with H_tmp
+FR_h_tmp = f45
+FR_rsq = f45
+// Shared with h_tmp
+FR_P6 = f45
+// Shared with h_tmp
+FR_abs_W = f46
+FR_r = f46
+// Shared with abs_W
+FR_AA = f47
+FR_log2_hi = f47
+// Shared with AA
+FR_BB = f48
+FR_log2_lo = f48
+// Shared with BB
+FR_S_lo = f49
+FR_two_negN = f50
+FR_float_N = f51
+FR_Q4 = f52
+FR_dummy = f52
+// Shared with Q4
+FR_P4 = f52
+// Shared with Q4
+FR_Threshold = f52
+// Shared with Q4
+FR_Q3 = f53
+FR_P3 = f53
+// Shared with Q3
+FR_Tiny = f53
+// Shared with Q3
+FR_Q2 = f54
+FR_P2 = f54
+// Shared with Q2
+FR_1LN10_hi = f54
+// Shared with Q2
+FR_Q1 = f55
+FR_P1 = f55
+// Shared with Q1
+FR_1LN10_lo = f55
+// Shared with Q1
+FR_P5 = f98
+FR_SCALE = f98
+FR_Output_X_tmp = f99
+
+GR_Expo_Range = r32
+GR_Table_Base = r34
+GR_Table_Base1 = r35
+GR_Table_ptr = r36
+GR_Index2 = r37
+GR_signif = r38
+GR_X_0 = r39
+GR_X_1 = r40
+GR_X_2 = r41
+GR_Z_1 = r42
+GR_Z_2 = r43
+GR_N = r44
+GR_Bias = r45
+GR_M = r46
+GR_ScaleN = r47
+GR_Index3 = r48
+GR_Perturb = r49
+GR_Table_Scale = r50
+
+//
+// Added for unwind support
+//
+
+GR_SAVE_PFS = r51
+GR_SAVE_B0 = r52
+GR_SAVE_GP = r53
+GR_Parameter_X = r54
+GR_Parameter_Y = r55
+GR_Parameter_RESULT = r56
+GR_Parameter_TAG = r57
+
+FR_X = f8
+FR_Y = f0
+FR_RESULT = f99
+
+.section .text
+.proc logl#
+.global logl#
+.align 64
+logl:
+#ifdef _LIBC
+.global __ieee754_logl
+__ieee754_logl:
+#endif
+{ .mfi
+alloc r32 = ar.pfs,0,22,4,0
+(p0) fnorm.s1 FR_X_Prime = FR_Input_X
+(p0) cmp.eq.unc p7, p0 = r0, r0
+}
+{ .mfi
+(p0) cmp.ne.unc p14, p0 = r0, r0
+(p0) fclass.m.unc p6, p0 = FR_Input_X, 0x1E3
+(p0) cmp.ne.unc p15, p0 = r0, r0 ;;
+}
+{ .mfi
+ nop.m 0
+(p0) fclass.nm.unc p10, p0 = FR_Input_X, 0x1FF
+ nop.i 0
+}
+{ .mfi
+nop.m 999
+(p0) fcmp.eq.unc.s1 p8, p0 = FR_Input_X, f0
+ nop.i 0
+}
+{ .mfi
+ nop.m 999
+(p0) fcmp.lt.unc.s1 p13, p0 = FR_Input_X, f0
+ nop.i 0
+}
+{ .mfi
+ nop.m 999
+(p0) fcmp.eq.unc.s1 p9, p0 = FR_Input_X, f1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fsub.s1 FR_Em1 = f0,f1
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p0) fadd FR_E = f0,f0
+//
+// Create E = 0 and Em1 = -1
+// Check for X == 1, meaning logl(1)
+// Check for X < 0, meaning logl(negative)
+// Check for X == 0, meaning logl(0)
+// Identify NatVals, NaNs, Infs.
+// Identify EM unsupporteds.
+// Identify Negative values - us S1 so as
+// not to raise denormal operand exception
+// Set p15 to false for log
+// Set p14 to false for log
+// Set p7 true for log and log1p
+//
+(p0) br.cond.sptk L(LOGL_BEGIN) ;;
+}
+
+.endp logl
+ASM_SIZE_DIRECTIVE(logl)
+
+.section .text
+.proc log10l#
+.global log10l#
+.align 64
+log10l:
+#ifdef _LIBC
+.global __ieee754_log10l
+__ieee754_log10l:
+#endif
+{ .mfi
+alloc r32 = ar.pfs,0,22,4,0
+(p0) fadd FR_E = f0,f0
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+(p0) fsub.s1 FR_Em1 = f0,f1
+ nop.i 0
+}
+{ .mfi
+(p0) cmp.ne.unc p15, p0 = r0, r0
+(p0) fcmp.eq.unc.s1 p9, p0 = FR_Input_X, f1
+ nop.i 0
+}
+{ .mfi
+(p0) cmp.eq.unc p14, p0 = r0, r0
+(p0) fcmp.lt.unc.s1 p13, p0 = FR_Input_X, f0
+(p0) cmp.ne.unc p7, p0 = r0, r0 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fcmp.eq.unc.s1 p8, p0 = FR_Input_X, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fclass.nm.unc p10, p0 = FR_Input_X, 0x1FF
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p6, p0 = FR_Input_X, 0x1E3
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p0) fnorm.s1 FR_X_Prime = FR_Input_X
+//
+// Create E = 0 and Em1 = -1
+// Check for X == 1, meaning logl(1)
+// Check for X < 0, meaning logl(negative)
+// Check for X == 0, meaning logl(0)
+// Identify NatVals, NaNs, Infs.
+// Identify EM unsupporteds.
+// Identify Negative values - us S1 so as
+// Identify Negative values - us S1 so as
+// not to raise denormal operand exception
+// Set p15 to false for log10
+// Set p14 to true for log10
+// Set p7 to false for log10
+//
+(p0) br.cond.sptk L(LOGL_BEGIN) ;;
+}
+
+.endp log10l
+ASM_SIZE_DIRECTIVE(log10l)
+
+.section .text
+.proc log1pl#
+.global log1pl#
+.align 64
+log1pl:
+#ifdef _LIBC
+.global __log1pl
+__log1pl:
+#endif
+{ .mfi
+alloc r32 = ar.pfs,0,22,4,0
+(p0) fsub.s1 FR_Neg_One = f0,f1
+(p0) cmp.eq.unc p7, p0 = r0, r0
+}
+{ .mfi
+(p0) cmp.ne.unc p14, p0 = r0, r0
+(p0) fnorm.s1 FR_X_Prime = FR_Input_X
+(p0) cmp.eq.unc p15, p0 = r0, r0 ;;
+}
+{ .mfi
+ nop.m 0
+(p0) fclass.m.unc p6, p0 = FR_Input_X, 0x1E3
+ nop.i 0
+}
+{ .mfi
+ nop.m 999
+(p0) fclass.nm.unc p10, p0 = FR_Input_X, 0x1FF
+ nop.i 0
+}
+{ .mfi
+ nop.m 999
+(p0) fcmp.eq.unc.s1 p9, p0 = FR_Input_X, f0
+ nop.i 0
+}
+{ .mfi
+ nop.m 999
+(p0) fadd FR_Em1 = f0,f0
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fadd FR_E = f0,f1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fcmp.eq.unc.s1 p8, p0 = FR_Input_X, FR_Neg_One
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fcmp.lt.unc.s1 p13, p0 = FR_Input_X, FR_Neg_One
+ nop.i 999
+}
+L(LOGL_BEGIN):
+{ .mfi
+ nop.m 999
+(p0) fadd.s1 FR_Z = FR_X_Prime, FR_E
+ nop.i 999
+}
+{ .mlx
+ nop.m 999
+(p0) movl GR_Table_Scale = 0x0000000000000018 ;;
+}
+{ .mmi
+ nop.m 999
+ nop.m 999
+//
+// Create E = 1 and Em1 = 0
+// Check for X == 0, meaning logl(1+0)
+// Check for X < -1, meaning logl(negative)
+// Check for X == -1, meaning logl(0)
+// Normalize x
+// Identify NatVals, NaNs, Infs.
+// Identify EM unsupporteds.
+// Identify Negative values - us S1 so as
+// not to raise denormal operand exception
+// Set p15 to true for log1p
+// Set p14 to false for log1p
+// Set p7 true for log and log1p
+//
+(p0) addl GR_Table_Base = @ltoff(Constants_Z_G_H_h1#),gp
+}
+{ .mfi
+ nop.m 999
+(p0) fmax.s1 FR_AA = FR_X_Prime, FR_E
+ nop.i 999 ;;
+}
+{ .mfi
+ ld8 GR_Table_Base = [GR_Table_Base]
+(p0) fmin.s1 FR_BB = FR_X_Prime, FR_E
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p0) fadd.s1 FR_W = FR_X_Prime, FR_Em1
+//
+// Begin load of constants base
+// FR_Z = Z = |x| + E
+// FR_W = W = |x| + Em1
+// AA = fmax(|x|,E)
+// BB = fmin(|x|,E)
+//
+(p6) br.cond.spnt L(LOGL_64_special) ;;
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+(p10) br.cond.spnt L(LOGL_64_unsupported) ;;
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+(p13) br.cond.spnt L(LOGL_64_negative) ;;
+}
+{ .mib
+(p0) getf.sig GR_signif = FR_Z
+ nop.i 999
+(p9) br.cond.spnt L(LOGL_64_one) ;;
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+(p8) br.cond.spnt L(LOGL_64_zero) ;;
+}
+{ .mfi
+(p0) getf.exp GR_N = FR_Z
+//
+// Raise possible denormal operand exception
+// Create Bias
+//
+// This function computes ln( x + e )
+// Input FR 1: FR_X = FR_Input_X
+// Input FR 2: FR_E = FR_E
+// Input FR 3: FR_Em1 = FR_Em1
+// Input GR 1: GR_Expo_Range = GR_Expo_Range = 1
+// Output FR 4: FR_Y_hi
+// Output FR 5: FR_Y_lo
+// Output FR 6: FR_Scale
+// Output PR 7: PR_Safe
+//
+(p0) fsub.s1 FR_S_lo = FR_AA, FR_Z
+//
+// signif = getf.sig(Z)
+// abs_W = fabs(w)
+//
+(p0) extr.u GR_Table_ptr = GR_signif, 59, 4 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fmerge.se FR_S_hi = f1,FR_Z
+(p0) extr.u GR_X_0 = GR_signif, 49, 15
+}
+{ .mmi
+ nop.m 999
+ nop.m 999
+(p0) addl GR_Table_Base1 = @ltoff(Constants_Z_G_H_h2#),gp ;;
+}
+{ .mlx
+ ld8 GR_Table_Base1 = [GR_Table_Base1]
+(p0) movl GR_Bias = 0x000000000000FFFF ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fabs FR_abs_W = FR_W
+(p0) pmpyshr2.u GR_Table_ptr = GR_Table_ptr,GR_Table_Scale,0
+}
+{ .mfi
+ nop.m 999
+//
+// Branch out for special input values
+//
+(p0) fcmp.lt.unc.s0 p8, p0 = FR_Input_X, f0
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// X_0 = extr.u(signif,49,15)
+// Index1 = extr.u(signif,59,4)
+//
+(p0) fadd.s1 FR_S_lo = FR_S_lo, FR_BB
+ nop.i 999 ;;
+}
+{ .mii
+ nop.m 999
+ nop.i 999 ;;
+//
+// Offset_to_Z1 = 24 * Index1
+// For performance, don't use result
+// for 3 or 4 cycles.
+//
+(p0) add GR_Table_ptr = GR_Table_ptr, GR_Table_Base ;;
+}
+//
+// Add Base to Offset for Z1
+// Create Bias
+{ .mmi
+(p0) ld4 GR_Z_1 = [GR_Table_ptr],4 ;;
+(p0) ldfs FR_G = [GR_Table_ptr],4
+ nop.i 999 ;;
+}
+{ .mmi
+(p0) ldfs FR_H = [GR_Table_ptr],8 ;;
+(p0) ldfd FR_h = [GR_Table_ptr],0
+(p0) pmpyshr2.u GR_X_1 = GR_X_0,GR_Z_1,15
+}
+//
+// Load Z_1
+// Get Base of Table2
+//
+{ .mfi
+(p0) getf.exp GR_M = FR_abs_W
+ nop.f 999
+ nop.i 999 ;;
+}
+{ .mii
+ nop.m 999
+ nop.i 999 ;;
+//
+// M = getf.exp(abs_W)
+// S_lo = AA - Z
+// X_1 = pmpyshr2(X_0,Z_1,15)
+//
+(p0) sub GR_M = GR_M, GR_Bias ;;
+}
+//
+// M = M - Bias
+// Load G1
+// N = getf.exp(Z)
+//
+{ .mii
+(p0) cmp.gt.unc p11, p0 = -80, GR_M
+(p0) cmp.gt.unc p12, p0 = -7, GR_M ;;
+(p0) extr.u GR_Index2 = GR_X_1, 6, 4 ;;
+}
+{ .mib
+ nop.m 999
+//
+// if -80 > M, set p11
+// Index2 = extr.u(X_1,6,4)
+// if -7 > M, set p12
+// Load H1
+//
+(p0) pmpyshr2.u GR_Index2 = GR_Index2,GR_Table_Scale,0
+(p11) br.cond.spnt L(log1pl_small) ;;
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+(p12) br.cond.spnt L(log1pl_near) ;;
+}
+{ .mii
+(p0) sub GR_N = GR_N, GR_Bias
+//
+// poly_lo = r * poly_lo
+//
+(p0) add GR_Perturb = 0x1, r0 ;;
+(p0) sub GR_ScaleN = GR_Bias, GR_N
+}
+{ .mii
+(p0) setf.sig FR_float_N = GR_N
+ nop.i 999 ;;
+//
+// Prepare Index2 - pmpyshr2.u(X_1,Z_2,15)
+// Load h1
+// S_lo = S_lo + BB
+// Branch for -80 > M
+//
+(p0) add GR_Index2 = GR_Index2, GR_Table_Base1
+}
+{ .mmi
+(p0) setf.exp FR_two_negN = GR_ScaleN
+ nop.m 999
+(p0) addl GR_Table_Base = @ltoff(Constants_Z_G_H_h3#),gp ;;
+}
+//
+// Index2 points to Z2
+// Branch for -7 > M
+//
+{ .mmb
+(p0) ld4 GR_Z_2 = [GR_Index2],4
+(p0) ld8 GR_Table_Base = [GR_Table_Base]
+ nop.b 999 ;;
+}
+(p0) nop.i 999
+//
+// Load Z_2
+// N = N - Bias
+// Tablebase points to Table3
+//
+{ .mmi
+(p0) ldfs FR_G_tmp = [GR_Index2],4 ;;
+//
+// Load G_2
+// pmpyshr2 X_2= (X_1,Z_2,15)
+// float_N = setf.sig(N)
+// ScaleN = Bias - N
+//
+(p0) ldfs FR_H_tmp = [GR_Index2],8
+ nop.i 999 ;;
+}
+//
+// Load H_2
+// two_negN = setf.exp(scaleN)
+// G = G_1 * G_2
+//
+{ .mfi
+(p0) ldfd FR_h_tmp = [GR_Index2],0
+ nop.f 999
+(p0) pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15 ;;
+}
+{ .mii
+ nop.m 999
+(p0) extr.u GR_Index3 = GR_X_2, 1, 5 ;;
+//
+// Load h_2
+// H = H_1 + H_2
+// h = h_1 + h_2
+// Index3 = extr.u(X_2,1,5)
+//
+(p0) shladd GR_Index3 = GR_Index3,4,GR_Table_Base
+}
+{ .mmi
+ nop.m 999
+ nop.m 999
+//
+// float_N = fcvt.xf(float_N)
+// load G3
+//
+(p0) addl GR_Table_Base = @ltoff(Constants_Q#),gp ;;
+}
+{ .mmi
+ nop.m 999
+ ld8 GR_Table_Base = [GR_Table_Base]
+ nop.i 999
+};;
+
+{ .mfi
+(p0) ldfe FR_log2_hi = [GR_Table_Base],16
+(p0) fmpy.s1 FR_S_lo = FR_S_lo, FR_two_negN
+ nop.i 999 ;;
+}
+{ .mmf
+ nop.m 999
+//
+// G = G3 * G
+// Load h3
+// Load log2_hi
+// H = H + H3
+//
+(p0) ldfe FR_log2_lo = [GR_Table_Base],16
+(p0) fmpy.s1 FR_G = FR_G, FR_G_tmp ;;
+}
+{ .mmf
+(p0) ldfs FR_G_tmp = [GR_Index3],4
+//
+// h = h + h3
+// r = G * S_hi + 1
+// Load log2_lo
+//
+(p0) ldfe FR_Q4 = [GR_Table_Base],16
+(p0) fadd.s1 FR_h = FR_h, FR_h_tmp ;;
+}
+{ .mfi
+(p0) ldfe FR_Q3 = [GR_Table_Base],16
+(p0) fadd.s1 FR_H = FR_H, FR_H_tmp
+ nop.i 999 ;;
+}
+{ .mmf
+(p0) ldfs FR_H_tmp = [GR_Index3],4
+(p0) ldfe FR_Q2 = [GR_Table_Base],16
+//
+// Comput Index for Table3
+// S_lo = S_lo * two_negN
+//
+(p0) fcvt.xf FR_float_N = FR_float_N ;;
+}
+//
+// If S_lo == 0, set p8 false
+// Load H3
+// Load ptr to table of polynomial coeff.
+//
+{ .mmf
+(p0) ldfd FR_h_tmp = [GR_Index3],0
+(p0) ldfe FR_Q1 = [GR_Table_Base],0
+(p0) fcmp.eq.unc.s1 p0, p8 = FR_S_lo, f0 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fmpy.s1 FR_G = FR_G, FR_G_tmp
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fadd.s1 FR_H = FR_H, FR_H_tmp
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fms.s1 FR_r = FR_G, FR_S_hi, f1
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fadd.s1 FR_h = FR_h, FR_h_tmp
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fma.s1 FR_Y_hi = FR_float_N, FR_log2_hi, FR_H
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Load Q4
+// Load Q3
+// Load Q2
+// Load Q1
+//
+(p8) fma.s1 FR_r = FR_G, FR_S_lo, FR_r
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+// poly_lo = r * Q4 + Q3
+// rsq = r* r
+//
+(p0) fma.s1 FR_h = FR_float_N, FR_log2_lo, FR_h
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// If (S_lo!=0) r = s_lo * G + r
+//
+(p0) fma.s1 FR_poly_lo = FR_r, FR_Q4, FR_Q3
+ nop.i 999
+}
+//
+// Create a 0x00000....01
+// poly_lo = poly_lo * rsq + h
+//
+{ .mfi
+(p0) setf.sig FR_dummy = GR_Perturb
+(p0) fmpy.s1 FR_rsq = FR_r, FR_r
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// h = N * log2_lo + h
+// Y_hi = n * log2_hi + H
+//
+(p0) fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_Q2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fma.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// poly_lo = r * poly_o + Q2
+// poly_hi = Q1 * rsq + r
+//
+(p0) fmpy.s1 FR_poly_lo = FR_poly_lo, FR_r
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fma.s1 FR_poly_lo = FR_poly_lo, FR_rsq, FR_h
+ nop.i 999 ;;
+}
+{ .mfb
+ nop.m 999
+(p0) fadd.s1 FR_Y_lo = FR_poly_hi, FR_poly_lo
+//
+// Create the FR for a binary "or"
+// Y_lo = poly_hi + poly_lo
+//
+// (p0) for FR_dummy = FR_Y_lo,FR_dummy ;;
+//
+// Turn the lsb of Y_lo ON
+//
+// (p0) fmerge.se FR_Y_lo = FR_Y_lo,FR_dummy ;;
+//
+// Merge the new lsb into Y_lo, for alone doesn't
+//
+(p0) br.cond.sptk LOGL_main ;;
+}
+L(log1pl_near):
+{ .mmi
+ nop.m 999
+ nop.m 999
+// /*******************************************************/
+// /*********** Branch log1pl_near ************************/
+// /*******************************************************/
+(p0) addl GR_Table_Base = @ltoff(Constants_P#),gp ;;
+}
+{ .mmi
+ nop.m 999
+ ld8 GR_Table_Base = [GR_Table_Base]
+ nop.i 999
+};;
+//
+// Load base address of poly. coeff.
+//
+{ .mmb
+(p0) add GR_Table_ptr = 0x40,GR_Table_Base
+//
+// Address tables with separate pointers
+//
+(p0) ldfe FR_P8 = [GR_Table_Base],16
+ nop.b 999 ;;
+}
+{ .mmb
+(p0) ldfe FR_P4 = [GR_Table_ptr],16
+//
+// Load P4
+// Load P8
+//
+(p0) ldfe FR_P7 = [GR_Table_Base],16
+ nop.b 999 ;;
+}
+{ .mmf
+(p0) ldfe FR_P3 = [GR_Table_ptr],16
+//
+// Load P3
+// Load P7
+//
+(p0) ldfe FR_P6 = [GR_Table_Base],16
+(p0) fmpy.s1 FR_wsq = FR_W, FR_W ;;
+}
+{ .mfi
+(p0) ldfe FR_P2 = [GR_Table_ptr],16
+ nop.f 999
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fma.s1 FR_Y_hi = FR_W, FR_P4, FR_P3
+ nop.i 999
+}
+//
+// Load P2
+// Load P6
+// Wsq = w * w
+// Y_hi = p4 * w + p3
+//
+{ .mfi
+(p0) ldfe FR_P5 = [GR_Table_Base],16
+(p0) fma.s1 FR_Y_lo = FR_W, FR_P8, FR_P7
+ nop.i 999 ;;
+}
+{ .mfi
+(p0) ldfe FR_P1 = [GR_Table_ptr],16
+//
+// Load P1
+// Load P5
+// Y_lo = p8 * w + P7
+//
+(p0) fmpy.s1 FR_w4 = FR_wsq, FR_wsq
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fma.s1 FR_Y_hi = FR_W, FR_Y_hi, FR_P2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fma.s1 FR_Y_lo = FR_W, FR_Y_lo, FR_P6
+(p0) add GR_Perturb = 0x1, r0 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// w4 = w2 * w2
+// Y_hi = y_hi * w + p2
+// Y_lo = y_lo * w + p6
+// Create perturbation bit
+//
+(p0) fmpy.s1 FR_w6 = FR_w4, FR_wsq
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fma.s1 FR_Y_hi = FR_W, FR_Y_hi, FR_P1
+ nop.i 999
+}
+//
+// Y_hi = y_hi * w + p1
+// w6 = w4 * w2
+//
+{ .mfi
+(p0) setf.sig FR_Q4 = GR_Perturb
+(p0) fma.s1 FR_Y_lo = FR_W, FR_Y_lo, FR_P5
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fma.s1 FR_dummy = FR_wsq,FR_Y_hi, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fma.s1 FR_Y_hi = FR_W,f1,f0
+ nop.i 999
+};;
+{ .mfb
+ nop.m 999
+//
+// Y_hi = w
+// Y_lo = y_lo * w + p5
+//
+(p0) fma.s1 FR_Y_lo = FR_w6, FR_Y_lo,FR_dummy
+//
+// Y_lo = y_lo * w6 + y_high order part.
+//
+// performance
+//
+(p0) br.cond.sptk LOGL_main ;;
+}
+L(log1pl_small):
+{ .mmi
+ nop.m 999
+// /*******************************************************/
+// /*********** Branch log1pl_small ***********************/
+// /*******************************************************/
+(p0) addl GR_Table_Base = @ltoff(Constants_Threshold#),gp
+}
+{ .mfi
+ nop.m 999
+(p0) mov FR_Em1 = FR_W
+(p0) cmp.eq.unc p7, p0 = r0, r0 ;;
+}
+{ .mlx
+ ld8 GR_Table_Base = [GR_Table_Base]
+(p0) movl GR_Expo_Range = 0x0000000000000004 ;;
+}
+//
+// Set Safe to true
+// Set Expo_Range = 0 for single
+// Set Expo_Range = 2 for double
+// Set Expo_Range = 4 for double-extended
+//
+{ .mmi
+(p0) shladd GR_Table_Base = GR_Expo_Range,4,GR_Table_Base ;;
+(p0) ldfe FR_Threshold = [GR_Table_Base],16
+ nop.i 999
+}
+{ .mlx
+ nop.m 999
+(p0) movl GR_Bias = 0x000000000000FF9B ;;
+}
+{ .mfi
+(p0) ldfe FR_Tiny = [GR_Table_Base],0
+ nop.f 999
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fcmp.gt.unc.s1 p13, p12 = FR_abs_W, FR_Threshold
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p13) fnmpy.s1 FR_Y_lo = FR_W, FR_W
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p13) fadd FR_SCALE = f0, f1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p12) fsub.s1 FR_Y_lo = f0, FR_Tiny
+(p12) cmp.ne.unc p7, p0 = r0, r0
+}
+{ .mfi
+(p12) setf.exp FR_SCALE = GR_Bias
+ nop.f 999
+ nop.i 999 ;;
+}
+{ .mfb
+ nop.m 999
+//
+// Set p7 to SAFE = FALSE
+// Set Scale = 2^-100
+//
+(p0) fma.s0 f8 = FR_Y_lo,FR_SCALE,FR_Y_hi
+(p0) br.ret.sptk b0 ;;
+}
+L(LOGL_64_one):
+{ .mfb
+ nop.m 999
+(p0) fmpy.s0 f8 = FR_Input_X, f0
+(p0) br.ret.sptk b0 ;;
+}
+//
+// Raise divide by zero for +/-0 input.
+//
+L(LOGL_64_zero):
+{ .mfi
+(p0) mov GR_Parameter_TAG = 0
+//
+// If we have logl(1), log10l(1) or log1pl(0), return 0.
+//
+(p0) fsub.s0 FR_Output_X_tmp = f0, f1
+ nop.i 999 ;;
+}
+{ .mii
+(p14) mov GR_Parameter_TAG = 6
+ nop.i 999 ;;
+(p15) mov GR_Parameter_TAG = 138 ;;
+}
+{ .mfb
+ nop.m 999
+(p0) frcpa.s0 FR_Output_X_tmp, p8 = FR_Output_X_tmp, f0
+(p0) br.cond.sptk __libm_error_region ;;
+}
+{ .mfb
+ nop.m 999
+//
+// Report that logl(0) computed
+// { .mfb
+(p0) mov FR_Input_X = FR_Output_X_tmp
+(p0) br.ret.sptk b0 ;;
+}
+
+L(LOGL_64_special):
+{ .mfi
+ nop.m 999
+//
+// Return -Inf or value from handler.
+//
+(p0) fclass.m.unc p7, p0 = FR_Input_X, 0x1E1
+ nop.i 999 ;;
+}
+{ .mfb
+ nop.m 999
+//
+// Check for Natval, QNan, SNaN, +Inf
+//
+(p7) fmpy.s0 f8 = FR_Input_X, f1
+//
+// For SNaN raise invalid and return QNaN.
+// For QNaN raise invalid and return QNaN.
+// For +Inf return +Inf.
+//
+(p7) br.ret.sptk b0 ;;
+}
+//
+// For -Inf raise invalid and return QNaN.
+//
+{ .mii
+(p0) mov GR_Parameter_TAG = 1
+ nop.i 999 ;;
+(p14) mov GR_Parameter_TAG = 7 ;;
+}
+{ .mfi
+(p15) mov GR_Parameter_TAG = 139
+ nop.f 999
+ nop.i 999 ;;
+}
+{ .mfb
+ nop.m 999
+(p0) fmpy.s0 FR_Output_X_tmp = FR_Input_X, f0
+(p0) br.cond.sptk __libm_error_region ;;
+}
+//
+// Report that logl(-Inf) computed
+// Report that log10l(-Inf) computed
+// Report that log1p(-Inf) computed
+//
+{ .mfb
+ nop.m 0
+(p0) mov FR_Input_X = FR_Output_X_tmp
+(p0) br.ret.sptk b0 ;;
+}
+L(LOGL_64_unsupported):
+{ .mfb
+ nop.m 999
+//
+// Return generated NaN or other value .
+//
+(p0) fmpy.s0 f8 = FR_Input_X, f0
+(p0) br.ret.sptk b0 ;;
+}
+L(LOGL_64_negative):
+{ .mfi
+ nop.m 999
+//
+// Deal with x < 0 in a special way
+//
+(p0) frcpa.s0 FR_Output_X_tmp, p8 = f0, f0
+//
+// Deal with x < 0 in a special way - raise
+// invalid and produce QNaN indefinite.
+//
+(p0) mov GR_Parameter_TAG = 1 ;;
+}
+{ .mii
+(p14) mov GR_Parameter_TAG = 7
+ nop.i 999 ;;
+(p15) mov GR_Parameter_TAG = 139
+}
+.endp log1pl
+ASM_SIZE_DIRECTIVE(log1pl)
+
+.proc __libm_error_region
+__libm_error_region:
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfe [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mib
+ stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
+}
+{ .mib
+ stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+{ .mmi
+ ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+.proc LOGL_main
+LOGL_main:
+{ .mfi
+ nop.m 999
+//
+// kernel_log_64 computes ln(X + E)
+//
+(p7) fadd.s0 FR_Input_X = FR_Y_lo,FR_Y_hi
+ nop.i 0
+}
+{ .mmi
+ nop.m 999
+ nop.m 999
+(p14) addl GR_Table_Base = @ltoff(Constants_1_by_LN10#),gp ;;
+}
+{ .mmi
+ nop.m 999
+(p14) ld8 GR_Table_Base = [GR_Table_Base]
+ nop.i 999
+};;
+
+{ .mmi
+(p14) ldfe FR_1LN10_hi = [GR_Table_Base],16 ;;
+(p14) ldfe FR_1LN10_lo = [GR_Table_Base]
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p14) fmpy.s1 FR_Output_X_tmp = FR_Y_lo,FR_1LN10_hi
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p14) fma.s1 FR_Output_X_tmp = FR_Y_hi,FR_1LN10_lo,FR_Output_X_tmp
+ nop.i 999 ;;
+}
+{ .mfb
+ nop.m 999
+(p14) fma.s0 FR_Input_X = FR_Y_hi,FR_1LN10_hi,FR_Output_X_tmp
+(p0) br.ret.sptk b0 ;;
+}
+.endp LOGL_main
+ASM_SIZE_DIRECTIVE(LOGL_main)
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/s_logb.S b/sysdeps/ia64/fpu/s_logb.S
new file mode 100644
index 0000000..d24f1f6
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_logb.S
@@ -0,0 +1,314 @@
+.file "logb.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00 Initial version
+// 2/16/00 Modified to conform to C9X
+// 3/16/00 Improved speed
+// 4/04/00 Unwind support added
+// 5/30/00 Fixed bug when x double-extended denormal
+// 8/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+//
+// API
+//==============================================================
+// double logb( double x);
+//
+// Overview of operation
+//==============================================================
+// The logb function extracts the exponent of x as an integer in
+// floating-point format.
+// logb computes log2 of x as a double
+//
+// logb is similar to ilogb but differs in the following ways:
+// +-inf
+// ilogb: returns INT_MAX
+// logb: returns +inf
+// Nan returns FP_LOGBNAN (which is either INT_MAX or INT_MIN)
+// ilogb: returns INT_MAX (7fffffff)
+// logb: returns QNAN (quietized SNAN)
+// 0 returns FP_ILOGB0 (which is either INT_MIN or -INT_MAX)
+// ilogb: returns -INT_MAX (80000001)
+// logb: returns -inf, raises the divide-by-zero exception,
+// and calls libm_error_support to set domain error
+//
+// Registers used
+//==============================================================
+// general registers used:
+// ar.pfs r32
+// r33 -> r37
+// r38 -> r41 used as parameters to error path
+//
+// predicate registers used:
+// p6, p7, p8
+// floating-point registers used:
+// f9, f10, f11
+// f8, input
+
+#include "libm_support.h"
+
+GR_SAVE_B0 = r34
+GR_SAVE_GP = r35
+GR_SAVE_PFS = r32
+
+GR_Parameter_X = r38
+GR_Parameter_Y = r39
+GR_Parameter_RESULT = r40
+
+.align 32
+.global logb#
+
+.section .text
+.proc logb#
+.align 32
+
+
+logb:
+
+// qnan snan inf norm unorm 0 -+
+// 0 0 0 0 1 0 11
+// 0 b
+{ .mfi
+ alloc r32=ar.pfs,1,5,4,0
+(p0) fclass.m.unc p8,p0 = f8, 0x0b
+ nop.i 999
+}
+// X NORMAL
+// r37 = exp(f8) - - 0xffff
+// sig(f8) = r37
+// f8 = convert_to_fp (sig))
+{ .mfi
+(p0) getf.exp r35 = f8
+(p0) fnorm f10=f8
+ nop.i 999 ;;
+}
+
+// qnan snan inf norm unorm 0 -+
+// 1 1 1 0 0 0 11
+// e 3
+{ .mmf
+(p0) mov r33 = 0xffff
+(p0) mov r34 = 0x1ffff
+(p0) fclass.m.unc p6,p0 = f8, 0xe3 ;;
+}
+
+{ .mfb
+(p0) and r36 = r35, r34
+(p0) fclass.m.unc p7,p0 = f8, 0x07
+(p8) br.cond.spnt L(LOGB_DENORM) ;;
+}
+
+{ .mib
+(p0) sub r37 = r36, r33
+ nop.i 999
+(p6) br.cond.spnt L(LOGB_NAN_INF) ;;
+}
+
+{ .mib
+(p0) setf.sig f9 = r37
+ nop.i 999
+(p7) br.cond.spnt L(LOGB_ZERO) ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fcvt.xf f10 = f9
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p0) fnorm.d f8 = f10
+(p0) br.ret.sptk b0 ;;
+}
+
+L(LOGB_DENORM):
+// Form signexp of 2^64 in case need to scale denormal
+// Check to see if double-extended denormal
+{ .mfi
+(p0) mov r38 = 0x1003f
+(p0) fclass.m.unc p8,p0 = f10, 0x0b
+ nop.i 999 ;;
+}
+
+// Form 2^64 in case need to scale denormal
+{ .mfi
+(p0) setf.exp f11 = r38
+ nop.f 999
+ nop.i 999 ;;
+}
+
+// If double-extended denormal add 64 to exponent bias for scaling
+// If double-extended denormal form x * 2^64 which is normal
+{ .mfi
+(p8) add r33 = 64, r33
+(p8) fmpy f10 = f10, f11
+ nop.i 999 ;;
+}
+
+// Logic is the same as normal path but use normalized input
+{ .mmi
+(p0) getf.exp r35 = f10 ;;
+ nop.m 999
+ nop.i 999 ;;
+}
+
+{ .mmi
+(p0) and r36 = r35, r34 ;;
+(p0) sub r37 = r36, r33
+ nop.i 999 ;;
+}
+
+{ .mmi
+(p0) setf.sig f9 = r37
+ nop.m 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fcvt.xf f10 = f9
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p0) fnorm.d f8 = f10
+(p0) br.ret.sptk b0 ;;
+}
+
+L(LOGB_NAN_INF):
+
+// X NAN or INFINITY, return f8 * f8
+{ .mfb
+ nop.m 999
+(p0) fma.d f8= f8,f8,f0
+(p0) br.ret.sptk b0 ;;
+}
+
+.endp logb#
+ASM_SIZE_DIRECTIVE(logb)
+
+// Stack operations when calling error support.
+// (1) (2) (3) (call) (4)
+// sp -> + psp -> + psp -> + sp -> +
+// | | | |
+// | | <- GR_Y R3 ->| <- GR_RESULT | -> f8
+// | | | |
+// | <-GR_Y Y2->| Y2 ->| <- GR_Y |
+// | | | |
+// | | <- GR_X X1 ->| |
+// | | | |
+// sp-64 -> + sp -> + sp -> + +
+// save ar.pfs save b0 restore gp
+// save gp restore ar.pfs
+
+
+
+.proc __libm_error_region
+__libm_error_region:
+L(LOGB_ZERO):
+.prologue
+
+// f9 = |f8|
+// f10 = -f9 = -|f8|
+// f9 = 1.0/f10 = -1.0/-|f8|
+
+{ .mfi
+ mov r41 = 151 // Error code
+(p0) fmerge.s f9 = f0,f8
+ nop.i 999
+}
+;;
+
+
+{ .mfi
+ nop.m 999
+ fmerge.ns f10 = f0,f9
+ nop.i 999
+}
+;;
+
+// (1)
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ frcpa f9,p6 = f1,f10
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+
+
+// (2)
+{ .mmi
+ stfd [GR_Parameter_Y] = f0,16 // STORE Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+
+.body
+// (3)
+{ .mib
+ stfd [GR_Parameter_X] = f8 // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
+}
+{ .mib
+ stfd [GR_Parameter_Y] = f9 // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+
+// (4)
+{ .mmi
+ ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/s_logbf.S b/sysdeps/ia64/fpu/s_logbf.S
new file mode 100644
index 0000000..d306847
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_logbf.S
@@ -0,0 +1,301 @@
+.file "logbf.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00 Initial version
+// 2/16/00 Modified to conform to C9X
+// 3/16/00 Improved speed
+// 4/04/00 Unwind support added
+// 5/30/00 Fixed bug when x double-extended denormal
+// 8/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+//
+// API
+//==============================================================
+// float logbf( float x);
+//
+// Overview of operation
+//==============================================================
+// The logbf function extracts the exponent of x as an integer in
+// floating-point format.
+// logbf computes log2 of x as a float
+
+// logbf is similar to ilogbf but differs in the following ways:
+// +-inf
+// ilogbf: returns INT_MAX
+// logbf: returns +inf
+// Nan returns FP_LOGBNAN (which is either INT_MAX or INT_MIN)
+// ilogbf: returns INT_MAX (7fffffff)
+// logbf: returns QNAN (quietized SNAN)
+// 0 returns FP_ILOGB0 (which is either INT_MIN or -INT_MAX)
+// ilogbf: returns -INT_MAX (80000001)
+// logbf: returns -inf, raises the divide-by-zero exception,
+// and calls libm_error_support to set domain error
+//
+// Registers used
+//==============================================================
+// general registers used:
+// ar.pfs r32
+// r33 -> r37
+// r38 -> r41 used as parameters to error path
+//
+// predicate registers used:
+// p6, p7, p8
+//
+// floating-point registers used:
+// f9, f10, f11
+// f8, input
+
+#include "libm_support.h"
+
+GR_SAVE_B0 = r34
+// r40 is address of table of coefficients
+GR_SAVE_PFS = r32
+GR_SAVE_GP = r35
+
+GR_Parameter_X = r38
+GR_Parameter_Y = r39
+GR_Parameter_RESULT = r40
+GR_Parameter_TAG = r41
+
+FR_X = f8
+FR_Y = f0
+FR_RESULT = f10
+
+
+.align 32
+.global logbf#
+
+.section .text
+.proc logbf#
+.align 32
+
+
+logbf:
+
+// qnan snan inf norm unorm 0 -+
+// 0 0 0 0 1 0 11
+// 0 b
+{ .mfi
+ alloc r32=ar.pfs,1,5,4,0
+(p0) fclass.m.unc p8,p0 = f8, 0x0b
+ nop.i 999
+}
+// X NORMAL
+// r37 = exp(f8) - - 0xffff
+// sig(f8) = r37
+// f8 = convert_to_fp (sig))
+{ .mfi
+(p0) getf.exp r35 = f8
+(p0) fnorm f10=f8
+ nop.i 999 ;;
+}
+
+// qnan snan inf norm unorm 0 -+
+// 1 1 1 0 0 0 11
+// e 3
+{ .mmf
+(p0) mov r33 = 0xffff
+(p0) mov r34 = 0x1ffff
+(p0) fclass.m.unc p6,p0 = f8, 0xe3 ;;
+}
+
+{ .mfb
+(p0) and r36 = r35, r34
+(p0) fclass.m.unc p7,p0 = f8, 0x07
+(p8) br.cond.spnt L(LOGB_DENORM) ;;
+}
+
+{ .mib
+(p0) sub r37 = r36, r33
+ nop.i 999
+(p6) br.cond.spnt L(LOGB_NAN_INF) ;;
+}
+
+{ .mib
+(p0) setf.sig f9 = r37
+ nop.i 999
+(p7) br.cond.spnt L(LOGB_ZERO) ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fcvt.xf f10 = f9
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p0) fnorm.s f8 = f10
+(p0) br.ret.sptk b0 ;;
+}
+
+L(LOGB_DENORM):
+// Form signexp of 2^64 in case need to scale denormal
+// Check to see if double-extended denormal
+{ .mfi
+(p0) mov r38 = 0x1003f
+(p0) fclass.m.unc p8,p0 = f10, 0x0b
+ nop.i 999 ;;
+}
+
+// Form 2^64 in case need to scale denormal
+{ .mfi
+(p0) setf.exp f11 = r38
+ nop.f 999
+ nop.i 999 ;;
+}
+
+// If double-extended denormal add 64 to exponent bias for scaling
+// If double-extended denormal form x * 2^64 which is normal
+{ .mfi
+(p8) add r33 = 64, r33
+(p8) fmpy f10 = f10, f11
+ nop.i 999 ;;
+}
+
+// Logic is the same as normal path but use normalized input
+{ .mmi
+(p0) getf.exp r35 = f10 ;;
+ nop.m 999
+ nop.i 999 ;;
+}
+
+{ .mmi
+(p0) and r36 = r35, r34 ;;
+(p0) sub r37 = r36, r33
+ nop.i 999 ;;
+}
+
+{ .mmi
+(p0) setf.sig f9 = r37
+ nop.m 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fcvt.xf f10 = f9
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p0) fnorm.s f8 = f10
+(p0) br.ret.sptk b0 ;;
+}
+
+L(LOGB_NAN_INF):
+
+// X NAN or INFINITY, return f8 * f8
+{ .mfb
+ nop.m 999
+(p0) fma.s f8= f8,f8,f0
+(p0) br.ret.sptk b0 ;;
+}
+
+L(LOGB_ZERO):
+
+// X ZERO
+// return -1.0/fabs(f8)=-inf, set divide-by-zero flag, call error support
+{ .mfi
+ nop.m 999
+(p0) fmerge.s f9 = f0,f8
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fmerge.ns f10 = f0,f9
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) frcpa f10,p6 = f1,f10
+ nop.i 999 ;;
+}
+
+.endp logbf
+ASM_SIZE_DIRECTIVE(logbf)
+
+
+.proc __libm_error_region
+__libm_error_region:
+.prologue
+{ .mii
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+(p0) mov GR_Parameter_TAG = 152
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfs [GR_Parameter_Y] = FR_Y,16 // Store Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mib
+ stfs [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
+}
+{ .mib
+ stfs [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+{ .mmi
+ ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/s_logbl.S b/sysdeps/ia64/fpu/s_logbl.S
new file mode 100644
index 0000000..e8275b2
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_logbl.S
@@ -0,0 +1,286 @@
+.file "logbl.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00 Initial version
+// 2/16/00 Modified to conform to C9X
+// 3/16/00 Improved speed
+// 4/04/00 Unwind support added
+// 5/30/00 Fixed bug when x double-extended denormal
+// 8/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+//
+// API
+//==============================================================
+// long double logbl( long double x);
+//
+// Overview of operation
+//==============================================================
+// The logbl function extracts the exponent of x as an integer in
+// floating-point format.
+// logbl computes log2 of x as a long double
+//
+// logbl is similar to ilogbl but differs in the following ways:
+// +-inf
+// ilogbl: returns INT_MAX
+// logbl: returns +inf
+// Nan returns FP_LOGBNAN (which is either INT_MAX or INT_MIN)
+// ilogbl: returns INT_MAX (7fffffff)
+// logbl: returns QNAN (quietized SNAN)
+// 0 returns FP_ILOGB0 (which is either INT_MIN or -INT_MAX)
+// ilogbl: returns -INT_MAX (80000001)
+// logbl: returns -inf, raises the divide-by-zero exception,
+// and calls libm_error_support to set domain error
+//
+// Registers used
+//==============================================================
+// general registers used:
+// ar.pfs r32
+// r33 -> r37
+// r38 -> r41 used as parameters to error path
+//
+// predicate registers used:
+// p6, p7, p8
+//
+// floating-point registers used:
+// f9, f10, f11
+// f8, input
+
+#include "libm_support.h"
+
+GR_SAVE_PFS = r32
+GR_SAVE_B0 = r34
+GR_SAVE_GP = r35
+GR_Parameter_X = r38
+GR_Parameter_Y = r39
+GR_Parameter_RESULT = r40
+GR_Parameter_TAG = r41
+
+FR_X = f8
+FR_Y = f0
+FR_RESULT = f10
+
+.align 32
+.global logbl#
+
+.section .text
+.proc logbl#
+.align 32
+
+
+logbl:
+
+// qnan snan inf norm unorm 0 -+
+// 0 0 0 0 1 0 11
+// 0 b
+{ .mfi
+ alloc r32=ar.pfs,1,5,4,0
+(p0) fclass.m.unc p8,p0 = f8, 0x0b
+ nop.i 999
+}
+// X NORMAL
+// r37 = exp(f8) - - 0xffff
+// sig(f8) = r37
+// f8 = convert_to_fp (sig))
+{ .mfi
+(p0) getf.exp r35 = f8
+(p0) fnorm f10=f8
+ nop.i 999 ;;
+}
+
+// qnan snan inf norm unorm 0 -+
+// 1 1 1 0 0 0 11
+// e 3
+{ .mmf
+(p0) mov r33 = 0xffff
+(p0) mov r34 = 0x1ffff
+(p0) fclass.m.unc p6,p0 = f8, 0xe3 ;;
+}
+
+{ .mfb
+(p0) and r36 = r35, r34
+(p0) fclass.m.unc p7,p0 = f8, 0x07
+(p8) br.cond.spnt L(LOGB_DENORM) ;;
+}
+
+{ .mib
+(p0) sub r37 = r36, r33
+ nop.i 999
+(p6) br.cond.spnt L(LOGB_NAN_INF) ;;
+}
+
+{ .mib
+(p0) setf.sig f9 = r37
+ nop.i 999
+(p7) br.cond.spnt L(LOGB_ZERO) ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fcvt.xf f10 = f9
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p0) fnorm f8 = f10
+(p0) br.ret.sptk b0 ;;
+}
+
+L(LOGB_DENORM):
+// Form signexp of 2^64 in case need to scale denormal
+// Check to see if double-extended denormal
+{ .mfi
+(p0) mov r38 = 0x1003f
+(p0) fclass.m.unc p8,p0 = f10, 0x0b
+ nop.i 999 ;;
+}
+
+// Form 2^64 in case need to scale denormal
+{ .mfi
+(p0) setf.exp f11 = r38
+ nop.f 999
+ nop.i 999 ;;
+}
+
+// If double-extended denormal add 64 to exponent bias for scaling
+// If double-extended denormal form x * 2^64 which is normal
+{ .mfi
+(p8) add r33 = 64, r33
+(p8) fmpy f10 = f10, f11
+ nop.i 999 ;;
+}
+
+// Logic is the same as normal path but use normalized input
+{ .mmi
+(p0) getf.exp r35 = f10 ;;
+ nop.m 999
+ nop.i 999 ;;
+}
+
+{ .mmi
+(p0) and r36 = r35, r34 ;;
+(p0) sub r37 = r36, r33
+ nop.i 999 ;;
+}
+
+{ .mmi
+(p0) setf.sig f9 = r37
+ nop.m 999
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p0) fcvt.xf f10 = f9
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p0) fnorm f8 = f10
+(p0) br.ret.sptk b0 ;;
+}
+
+L(LOGB_NAN_INF):
+
+// X NAN or INFINITY, return f8 * f8
+{ .mfb
+ nop.m 999
+(p0) fma f8= f8,f8,f0
+(p0) br.ret.sptk b0 ;;
+}
+
+L(LOGB_ZERO):
+{.mfi
+ nop.m 0
+(p0) frcpa.s0 f10,p6 = f1,f0
+ nop.i 0
+};;
+{.mfi
+ mov GR_Parameter_TAG = 150
+(p0) fms.s1 f10 = f0,f0,f10
+ nop.i 0
+};;
+// X ZERO
+// return -1.0/fabs(f8)=-inf, set divide-by-zero flag, call error support
+.endp logbl
+ASM_SIZE_DIRECTIVE(logbl)
+
+.proc __libm_error_region
+__libm_error_region:
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfe [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mib
+ stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
+}
+{ .mib
+ stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+{ .mmi
+ ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/s_matherrf.c b/sysdeps/ia64/fpu/s_matherrf.c
new file mode 100644
index 0000000..4b3033e
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_matherrf.c
@@ -0,0 +1,33 @@
+/* Derived from: */
+/*
+ * ====================================================
+ * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunPro, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ */
+
+
+#include "math.h"
+#include "math_private.h"
+#include "libm_support.h"
+
+#ifdef __STDC__
+ int
+ weak_function
+ __matherrf(struct exceptionf *x)
+#else
+ int
+ weak_function
+ __matherrf(x)
+ struct exceptionf *x;
+#endif
+{
+ int n=0;
+ if(x->arg1!=x->arg1) return 0;
+ return n;
+}
+weak_alias (__matherrf, matherrf)
diff --git a/sysdeps/ia64/fpu/s_matherrl.c b/sysdeps/ia64/fpu/s_matherrl.c
new file mode 100644
index 0000000..751cc6b
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_matherrl.c
@@ -0,0 +1,33 @@
+/* Derived from: */
+/*
+ * ====================================================
+ * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunPro, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ */
+
+
+#include "math.h"
+#include "math_private.h"
+#include "libm_support.h"
+
+#ifdef __STDC__
+ int
+ weak_function
+ __matherrl(struct exceptionl *x)
+#else
+ int
+ weak_function
+ __matherrl(x)
+ struct exceptionl *x;
+#endif
+{
+ int n=0;
+ if(x->arg1!=x->arg1) return 0;
+ return n;
+}
+weak_alias (__matherrl, matherrl)
diff --git a/sysdeps/ia64/fpu/s_modf.S b/sysdeps/ia64/fpu/s_modf.S
new file mode 100644
index 0000000..0bfad13
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_modf.S
@@ -0,0 +1,272 @@
+.file "modf.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00: Initial version
+// 4/04/00: Improved speed, corrected result for NaN input
+// 12/22/00 Fixed so inexact flag is never set, and invalid is not set for
+// qnans nor for inputs larger than 2^63.
+//
+// API
+//==============================================================
+// double modf(double x, double *iptr)
+// break a floating point x number into fraction and an exponent
+//
+// input floating point f8, address in r33
+// output floating point f8 (x fraction), and *iptr (x integral part)
+//
+// OVERVIEW
+//==============================================================
+//
+// NO FRACTIONAL PART: HUGE
+// If
+// for double-extended
+// If the true exponent is greater than or equal 63
+// 1003e ==> 1003e -ffff = 3f = 63(dec)
+// for double
+// If the true exponent is greater than or equal 52
+// 10033 -ffff = 34 = 52(dec)
+// for single
+// If the true exponent is greater than or equal 23
+// 10016 -ffff = 17 = 23(dec)
+// then
+// we are already an integer (p9 true)
+
+// NO INTEGER PART: SMALL
+// Is f8 exponent less than register bias (that is, is it
+// less than 1). If it is, get the right sign of
+// zero and store this in iptr.
+
+// CALCULATION: NOT HUGE, NOT SMALL
+// To get the integer part
+// Take the floating-point input and truncate
+// then convert this integer to fp Call it MODF_INTEGER_PART
+
+// Subtract MODF_INTEGER_PART from MODF_NORM_F8 to get fraction part
+// Then put fraction part in f8
+// put integer part MODF_INTEGER_PART into *iptr
+
+// Registers used
+//==============================================================
+
+// predicate registers used:
+// p6 - p13
+
+// 0xFFFF 0x10033
+// -----------------------+-----------------+-------------
+// SMALL | NORMAL | HUGE
+// p11 --------------->|<----- p12 ----->| <-------------- p9
+// p10 --------------------------------->|
+// p13 --------------------------------------------------->|
+//
+
+#include "libm_support.h"
+
+// floating-point registers used:
+MODF_NORM_F8 = f9
+MODF_FRACTION_PART = f10
+MODF_INTEGER_PART = f11
+MODF_INT_INTEGER_PART = f12
+
+
+// general registers used
+modf_signexp = r14
+modf_GR_no_frac = r15
+modf_GR_FFFF = r16
+modf_17_ones = r17
+modf_exp = r18
+// r33 = iptr
+
+
+.align 32
+.global modf#
+
+.section .text
+.proc modf#
+.align 32
+
+
+// Main path is p9, p11, p8 FALSE and p12 TRUE
+
+// Assume input is normalized and get signexp
+// Normalize input just in case
+// Form exponent bias
+modf:
+{ .mfi
+ getf.exp modf_signexp = f8
+ fnorm MODF_NORM_F8 = f8
+ addl modf_GR_FFFF = 0xffff, r0
+}
+// Get integer part of input
+// Form exponent mask
+{ .mfi
+ nop.m 999
+ fcvt.fx.trunc.s1 MODF_INT_INTEGER_PART = f8
+ mov modf_17_ones = 0x1ffff ;;
+}
+
+// Is x nan or inf?
+// qnan snan inf norm unorm 0 -+
+// 1 1 1 0 0 0 11 = 0xe3 NAN_INF
+// Form biased exponent where input only has an integer part
+{ .mfi
+ nop.m 999
+ fclass.m.unc p6,p13 = f8, 0xe3
+ addl modf_GR_no_frac = 0x10033, r0 ;;
+}
+
+// Mask to get exponent
+// Is x unnorm?
+// qnan snan inf norm unorm 0 -+
+// 0 0 0 0 1 0 11 = 0x0b UNORM
+// Set p13 to indicate calculation path, else p6 if nan or inf
+{ .mfi
+ and modf_exp = modf_17_ones, modf_signexp
+ fclass.m.unc p8,p0 = f8, 0x0b
+ nop.i 999 ;;
+}
+
+// p11 <== SMALL, no integer part, fraction is everyting
+// p9 <== HUGE, no fraction part, integer is everything
+// p12 <== NORMAL, fraction part and integer part
+{ .mii
+(p13) cmp.lt.unc p11,p10 = modf_exp, modf_GR_FFFF
+ nop.i 999
+ nop.i 999 ;;
+}
+
+// Is x inf? p6 if inf, p7 if nan
+{ .mfb
+(p10) cmp.ge.unc p9,p12 = modf_exp, modf_GR_no_frac
+(p6) fclass.m.unc p6,p7 = f8, 0x23
+(p8) br.cond.spnt L(MODF_DENORM) ;;
+}
+
+L(MODF_COMMON):
+// For HUGE set fraction to signed 0
+{ .mfi
+ nop.m 999
+(p9) fmerge.s f8 = f8,f0
+ nop.i 999
+}
+// For HUGE set integer part to normalized input
+{ .mfi
+ nop.m 999
+(p9) fnorm.d MODF_INTEGER_PART = MODF_NORM_F8
+ nop.i 999 ;;
+}
+
+// For SMALL set fraction to normalized input, integer part to signed 0
+{ .mfi
+ nop.m 999
+(p11) fmerge.s MODF_INTEGER_PART = f8,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p11) fnorm.d f8 = MODF_NORM_F8
+ nop.i 999 ;;
+}
+
+// For NORMAL float the integer part
+{ .mfi
+ nop.m 999
+(p12) fcvt.xf MODF_INTEGER_PART = MODF_INT_INTEGER_PART
+ nop.i 999 ;;
+}
+
+// If x inf set integer part to INF, fraction to signed 0
+{ .mfi
+(p6) stfd [r33] = MODF_NORM_F8
+(p6) fmerge.s f8 = f8,f0
+ nop.i 999 ;;
+}
+
+// If x nan set integer and fraction parts to NaN (quietized)
+{ .mfi
+(p7) stfd [r33] = MODF_NORM_F8
+(p7) fmerge.s f8 = MODF_NORM_F8, MODF_NORM_F8
+ nop.i 999 ;;
+}
+
+{ .mmi
+(p9) stfd [r33] = MODF_INTEGER_PART
+ nop.m 999
+ nop.i 999 ;;
+}
+
+// For NORMAL compute fraction part
+{ .mfi
+(p11) stfd [r33] = MODF_INTEGER_PART
+(p12) fms.d.s0 f8 = MODF_NORM_F8,f1, MODF_INTEGER_PART
+ nop.i 999 ;;
+}
+
+// For NORMAL test if fraction part is zero; if so append correct sign
+{ .mfi
+ nop.m 999
+(p12) fcmp.eq.unc p7,p0 = MODF_NORM_F8, MODF_INTEGER_PART
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p12) stfd [r33] = MODF_INTEGER_PART
+ nop.f 999
+ nop.i 999 ;;
+}
+
+// For NORMAL if fraction part is zero append sign of input
+{ .mfb
+ nop.m 999
+(p7) fmerge.s f8 = MODF_NORM_F8, f0
+ br.ret.sptk b0 ;;
+}
+
+L(MODF_DENORM):
+// If x unorm get signexp from normalized input
+// If x unorm get integer part from normalized input
+{ .mfi
+ getf.exp modf_signexp = MODF_NORM_F8
+ fcvt.fx.trunc.s1 MODF_INT_INTEGER_PART = MODF_NORM_F8
+ nop.i 999 ;;
+}
+
+// If x unorm mask to get exponent
+{ .mmi
+ and modf_exp = modf_17_ones, modf_signexp ;;
+ cmp.lt.unc p11,p10 = modf_exp, modf_GR_FFFF
+ nop.i 999 ;;
+}
+
+{ .mfb
+(p10) cmp.ge.unc p9,p12 = modf_exp, modf_GR_no_frac
+ nop.f 999
+ br.cond.spnt L(MODF_COMMON) ;;
+}
+
+.endp modf
+ASM_SIZE_DIRECTIVE(modf)
diff --git a/sysdeps/ia64/fpu/s_modff.S b/sysdeps/ia64/fpu/s_modff.S
new file mode 100644
index 0000000..e56a07c
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_modff.S
@@ -0,0 +1,272 @@
+.file "modff.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00: Initial version
+// 4/04/00: Improved speed, corrected result for NaN input
+// 12/22/00 Fixed so inexact flag is never set, and invalid is not set for
+// qnans nor for inputs larger than 2^63.
+//
+// API
+//==============================================================
+// float modff(float x, float *iptr)
+// break a floating point x number into fraction and an exponent
+//
+// input floating point f8, address in r33
+// output floating point f8 (x fraction), and *iptr (x integral part)
+//
+// OVERVIEW
+//==============================================================
+
+// NO FRACTIONAL PART: HUGE
+// If
+// for double-extended
+// If the true exponent is greater than or equal 63
+// 1003e ==> 1003e -ffff = 3f = 63(dec)
+// for double
+// If the true exponent is greater than or equal 52
+// 10033 -ffff = 34 = 52(dec)
+// for single
+// If the true exponent is greater than or equal 23
+// 10016 -ffff = 17 = 23(dec)
+// then
+// we are already an integer (p9 true)
+
+// NO INTEGER PART: SMALL
+// Is f8 exponent less than register bias (that is, is it
+// less than 1). If it is, get the right sign of
+// zero and store this in iptr.
+
+// CALCULATION: NOT HUGE, NOT SMALL
+// To get the integer part
+// Take the floating-point input and truncate
+// then convert this integer to fp Call it MODF_INTEGER_PART
+
+// Subtract MODF_INTEGER_PART from MODF_NORM_F8 to get fraction part
+// Then put fraction part in f8
+// put integer part MODF_INTEGER_PART into *iptr
+
+// Registers used
+//==============================================================
+
+// predicate registers used:
+// p6 - p13
+
+// 0xFFFF 0x10016
+// -----------------------+-----------------+-------------
+// SMALL | NORMAL | HUGE
+// p11 --------------->|<----- p12 ----->| <-------------- p9
+// p10 --------------------------------->|
+// p13 --------------------------------------------------->|
+//
+
+#include "libm_support.h"
+
+// floating-point registers used:
+MODF_NORM_F8 = f9
+MODF_FRACTION_PART = f10
+MODF_INTEGER_PART = f11
+MODF_INT_INTEGER_PART = f12
+
+
+// general registers used
+modf_signexp = r14
+modf_GR_no_frac = r15
+modf_GR_FFFF = r16
+modf_17_ones = r17
+modf_exp = r18
+// r33 = iptr
+
+
+.align 32
+.global modff#
+
+.section .text
+.proc modff#
+.align 32
+
+
+// Main path is p9, p11, p8 FALSE and p12 TRUE
+
+// Assume input is normalized and get signexp
+// Normalize input just in case
+// Form exponent bias
+modff:
+{ .mfi
+ getf.exp modf_signexp = f8
+ fnorm MODF_NORM_F8 = f8
+ addl modf_GR_FFFF = 0xffff, r0
+}
+// Get integer part of input
+// Form exponent mask
+{ .mfi
+ nop.m 999
+ fcvt.fx.trunc.s1 MODF_INT_INTEGER_PART = f8
+ mov modf_17_ones = 0x1ffff ;;
+}
+
+// Is x nan or inf?
+// qnan snan inf norm unorm 0 -+
+// 1 1 1 0 0 0 11 = 0xe3 NAN_INF
+// Form biased exponent where input only has an integer part
+{ .mfi
+ nop.m 999
+ fclass.m.unc p6,p13 = f8, 0xe3
+ addl modf_GR_no_frac = 0x10016, r0 ;;
+}
+
+// Mask to get exponent
+// Is x unnorm?
+// qnan snan inf norm unorm 0 -+
+// 0 0 0 0 1 0 11 = 0x0b UNORM
+// Set p13 to indicate calculation path, else p6 if nan or inf
+{ .mfi
+ and modf_exp = modf_17_ones, modf_signexp
+ fclass.m.unc p8,p0 = f8, 0x0b
+ nop.i 999 ;;
+}
+
+// p11 <== SMALL, no integer part, fraction is everyting
+// p9 <== HUGE, no fraction part, integer is everything
+// p12 <== NORMAL, fraction part and integer part
+{ .mii
+(p13) cmp.lt.unc p11,p10 = modf_exp, modf_GR_FFFF
+ nop.i 999
+ nop.i 999 ;;
+}
+
+// Is x inf? p6 if inf, p7 if nan
+{ .mfb
+(p10) cmp.ge.unc p9,p12 = modf_exp, modf_GR_no_frac
+(p6) fclass.m.unc p6,p7 = f8, 0x23
+(p8) br.cond.spnt L(MODF_DENORM) ;;
+}
+
+L(MODF_COMMON):
+// For HUGE set fraction to signed 0
+{ .mfi
+ nop.m 999
+(p9) fmerge.s f8 = f8,f0
+ nop.i 999
+}
+// For HUGE set integer part to normalized input
+{ .mfi
+ nop.m 999
+(p9) fnorm.s MODF_INTEGER_PART = MODF_NORM_F8
+ nop.i 999 ;;
+}
+
+// For SMALL set fraction to normalized input, integer part to signed 0
+{ .mfi
+ nop.m 999
+(p11) fmerge.s MODF_INTEGER_PART = f8,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p11) fnorm.s f8 = MODF_NORM_F8
+ nop.i 999 ;;
+}
+
+// For NORMAL float the integer part
+{ .mfi
+ nop.m 999
+(p12) fcvt.xf MODF_INTEGER_PART = MODF_INT_INTEGER_PART
+ nop.i 999 ;;
+}
+
+// If x inf set integer part to INF, fraction to signed 0
+{ .mfi
+(p6) stfs [r33] = MODF_NORM_F8
+(p6) fmerge.s f8 = f8,f0
+ nop.i 999 ;;
+}
+
+// If x nan set integer and fraction parts to NaN (quietized)
+{ .mfi
+(p7) stfs [r33] = MODF_NORM_F8
+(p7) fmerge.s f8 = MODF_NORM_F8, MODF_NORM_F8
+ nop.i 999 ;;
+}
+
+{ .mmi
+(p9) stfs [r33] = MODF_INTEGER_PART
+ nop.m 999
+ nop.i 999 ;;
+}
+
+// For NORMAL compute fraction part
+{ .mfi
+(p11) stfs [r33] = MODF_INTEGER_PART
+(p12) fms.s.s0 f8 = MODF_NORM_F8,f1, MODF_INTEGER_PART
+ nop.i 999 ;;
+}
+
+// For NORMAL test if fraction part is zero; if so append correct sign
+{ .mfi
+ nop.m 999
+(p12) fcmp.eq.unc p7,p0 = MODF_NORM_F8, MODF_INTEGER_PART
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p12) stfs [r33] = MODF_INTEGER_PART
+ nop.f 999
+ nop.i 999 ;;
+}
+
+// For NORMAL if fraction part is zero append sign of input
+{ .mfb
+ nop.m 999
+(p7) fmerge.s f8 = MODF_NORM_F8, f0
+ br.ret.sptk b0 ;;
+}
+
+L(MODF_DENORM):
+// If x unorm get signexp from normalized input
+// If x unorm get integer part from normalized input
+{ .mfi
+ getf.exp modf_signexp = MODF_NORM_F8
+ fcvt.fx.trunc.s1 MODF_INT_INTEGER_PART = MODF_NORM_F8
+ nop.i 999 ;;
+}
+
+// If x unorm mask to get exponent
+{ .mmi
+ and modf_exp = modf_17_ones, modf_signexp ;;
+ cmp.lt.unc p11,p10 = modf_exp, modf_GR_FFFF
+ nop.i 999 ;;
+}
+
+{ .mfb
+(p10) cmp.ge.unc p9,p12 = modf_exp, modf_GR_no_frac
+ nop.f 999
+ br.cond.spnt L(MODF_COMMON) ;;
+}
+
+.endp modff
+ASM_SIZE_DIRECTIVE(modff)
diff --git a/sysdeps/ia64/fpu/s_modfl.S b/sysdeps/ia64/fpu/s_modfl.S
new file mode 100644
index 0000000..e15508b
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_modfl.S
@@ -0,0 +1,267 @@
+.file "modfl.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00: Initial version
+// 4/04/00: Improved speed, corrected result for NaN input
+// 5/30/00 Fixed bug for exponent 0x1003e
+// 12/22/00 Fixed so inexact flag is never set, and invalid is not set for
+// qnans nor for inputs larger than 2^63.
+//
+// API
+//==============================================================
+// long double modfl(long double x, long double *iptr)
+// break a floating point x number into fraction and an exponent
+//
+// input floating point f8, address in r34
+// output floating point f8 (x fraction), and *iptr (x integral part)
+//
+// OVERVIEW
+//==============================================================
+//
+// NO FRACTIONAL PART: HUGE
+// If
+// for double-extended
+// If the true exponent is >= 63
+// 1003e ==> 1003e -ffff = 3f = 63(dec)
+// then
+// we are already an integer (p9 true)
+
+// NO INTEGER PART: SMALL
+// Is f8 exponent less than register bias (that is, is it
+// less than 1). If it is, get the right sign of
+// zero and store this in iptr.
+
+// CALCULATION: NOT HUGE, NOT SMALL
+// To get the integer part
+// Take the floating-point input and truncate
+// then convert this integer to fp Call it MODF_INTEGER_PART
+
+// Subtract MODF_INTEGER_PART from MODF_NORM_F8 to get fraction part
+// Then put fraction part in f8
+// put integer part MODF_INTEGER_PART into *iptr
+
+// Registers used
+//==============================================================
+
+// predicate registers used:
+// p6 - p13
+
+// 0xFFFF 0x1003e
+// -----------------------+-----------------+-------------
+// SMALL | NORMAL | HUGE
+// p11 --------------->|<----- p12 ----->| <-------------- p9
+// p10 --------------------------------->|
+// p13 --------------------------------------------------->|
+//
+
+#include "libm_support.h"
+
+// floating-point registers used:
+MODF_NORM_F8 = f9
+MODF_FRACTION_PART = f10
+MODF_INTEGER_PART = f11
+MODF_INT_INTEGER_PART = f12
+
+
+// general registers used
+modf_signexp = r14
+modf_GR_no_frac = r15
+modf_GR_FFFF = r16
+modf_17_ones = r17
+modf_exp = r18
+// r34 = iptr
+
+
+.align 32
+.global modfl#
+
+.section .text
+.proc modfl#
+.align 32
+
+
+// Main path is p9, p11, p8 FALSE and p12 TRUE
+
+// Assume input is normalized and get signexp
+// Normalize input just in case
+// Form exponent bias
+modfl:
+{ .mfi
+ getf.exp modf_signexp = f8
+ fnorm MODF_NORM_F8 = f8
+ addl modf_GR_FFFF = 0xffff, r0
+}
+// Get integer part of input
+// Form exponent mask
+{ .mfi
+ nop.m 999
+ fcvt.fx.trunc.s1 MODF_INT_INTEGER_PART = f8
+ mov modf_17_ones = 0x1ffff ;;
+}
+
+// Is x nan or inf?
+// qnan snan inf norm unorm 0 -+
+// 1 1 1 0 0 0 11 = 0xe3 NAN_INF
+// Form biased exponent where input only has an integer part
+{ .mfi
+ nop.m 999
+ fclass.m.unc p6,p13 = f8, 0xe3
+ addl modf_GR_no_frac = 0x1003e, r0 ;;
+}
+
+// Mask to get exponent
+// Is x unnorm?
+// qnan snan inf norm unorm 0 -+
+// 0 0 0 0 1 0 11 = 0x0b UNORM
+// Set p13 to indicate calculation path, else p6 if nan or inf
+{ .mfi
+ and modf_exp = modf_17_ones, modf_signexp
+ fclass.m.unc p8,p0 = f8, 0x0b
+ nop.i 999 ;;
+}
+
+// p11 <== SMALL, no integer part, fraction is everyting
+// p9 <== HUGE, no fraction part, integer is everything
+// p12 <== NORMAL, fraction part and integer part
+{ .mii
+(p13) cmp.lt.unc p11,p10 = modf_exp, modf_GR_FFFF
+ nop.i 999
+ nop.i 999 ;;
+}
+
+// Is x inf? p6 if inf, p7 if nan
+{ .mfb
+(p10) cmp.ge.unc p9,p12 = modf_exp, modf_GR_no_frac
+(p6) fclass.m.unc p6,p7 = f8, 0x23
+(p8) br.cond.spnt L(MODF_DENORM) ;;
+}
+
+L(MODF_COMMON):
+// For HUGE set fraction to signed 0
+{ .mfi
+ nop.m 999
+(p9) fmerge.s f8 = f8,f0
+ nop.i 999
+}
+// For HUGE set integer part to normalized input
+{ .mfi
+ nop.m 999
+(p9) fnorm MODF_INTEGER_PART = MODF_NORM_F8
+ nop.i 999 ;;
+}
+
+// For SMALL set fraction to normalized input, integer part to signed 0
+{ .mfi
+ nop.m 999
+(p11) fmerge.s MODF_INTEGER_PART = f8,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p11) fnorm f8 = MODF_NORM_F8
+ nop.i 999 ;;
+}
+
+// For NORMAL float the integer part
+{ .mfi
+ nop.m 999
+(p12) fcvt.xf MODF_INTEGER_PART = MODF_INT_INTEGER_PART
+ nop.i 999 ;;
+}
+
+// If x inf set integer part to INF, fraction to signed 0
+{ .mfi
+(p6) stfe [r34] = MODF_NORM_F8
+(p6) fmerge.s f8 = f8,f0
+ nop.i 999 ;;
+}
+
+// If x nan set integer and fraction parts to NaN (quietized)
+{ .mfi
+(p7) stfe [r34] = MODF_NORM_F8
+(p7) fmerge.s f8 = MODF_NORM_F8, MODF_NORM_F8
+ nop.i 999 ;;
+}
+
+{ .mmi
+(p9) stfe [r34] = MODF_INTEGER_PART
+ nop.m 999
+ nop.i 999 ;;
+}
+
+// For NORMAL compute fraction part
+{ .mfi
+(p11) stfe [r34] = MODF_INTEGER_PART
+(p12) fms.s0 f8 = MODF_NORM_F8,f1, MODF_INTEGER_PART
+ nop.i 999 ;;
+}
+
+// For NORMAL test if fraction part is zero; if so append correct sign
+{ .mfi
+ nop.m 999
+(p12) fcmp.eq.unc p7,p0 = MODF_NORM_F8, MODF_INTEGER_PART
+ nop.i 999 ;;
+}
+
+{ .mfi
+(p12) stfe [r34] = MODF_INTEGER_PART
+ nop.f 999
+ nop.i 999 ;;
+}
+
+// For NORMAL if fraction part is zero append sign of input
+{ .mfb
+ nop.m 999
+(p7) fmerge.s f8 = MODF_NORM_F8, f0
+ br.ret.sptk b0 ;;
+}
+
+L(MODF_DENORM):
+// If x unorm get signexp from normalized input
+// If x unorm get integer part from normalized input
+{ .mfi
+ getf.exp modf_signexp = MODF_NORM_F8
+ fcvt.fx.trunc.s1 MODF_INT_INTEGER_PART = MODF_NORM_F8
+ nop.i 999 ;;
+}
+
+// If x unorm mask to get exponent
+{ .mmi
+ and modf_exp = modf_17_ones, modf_signexp ;;
+ cmp.lt.unc p11,p10 = modf_exp, modf_GR_FFFF
+ nop.i 999 ;;
+}
+
+{ .mfb
+(p10) cmp.ge.unc p9,p12 = modf_exp, modf_GR_no_frac
+ nop.f 999
+ br.cond.spnt L(MODF_COMMON) ;;
+}
+
+.endp modfl
+ASM_SIZE_DIRECTIVE(modfl)
diff --git a/sysdeps/ia64/fpu/s_nearbyint.S b/sysdeps/ia64/fpu/s_nearbyint.S
new file mode 100644
index 0000000..8c7e4a9
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_nearbyint.S
@@ -0,0 +1,221 @@
+.file "nearbyint.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 10/19/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
+// Bob Norin, Tom Rowan, Shane Story, and Ping Tak Peter Tang of the
+// Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 10/19/2000: Created
+// 2/08/01 Corrected behavior for all rounding modes.
+//==============================================================
+//
+// API
+//==============================================================
+// double nearbyint(double x)
+
+#include "libm_support.h"
+
+//
+// general registers used:
+//
+
+nearbyint_GR_signexp = r14
+nearbyint_GR_exponent = r15
+nearbyint_GR_17ones = r16
+nearbyint_GR_10033 = r17
+nearbyint_GR_fpsr = r18
+nearbyint_GR_rcs0 = r19
+nearbyint_GR_rcs0_mask = r20
+
+
+// predicate registers used:
+// p6-11
+
+// floating-point registers used:
+
+NEARBYINT_NORM_f8 = f9
+NEARBYINT_FLOAT_INT_f8 = f10
+NEARBYINT_INT_f8 = f11
+
+// Overview of operation
+//==============================================================
+
+// double nearbyint(double x)
+// Return an integer value (represented as a double) that is x rounded to integer in current
+// rounding mode
+// *******************************************************************************
+
+// Set denormal flag for denormal input and
+// and take denormal fault if necessary.
+
+// Is the input an integer value already?
+
+// double_extended
+// if the exponent is >= 1003e => 3F(true) = 63(decimal)
+// we have a significand of 64 bits 1.63-bits.
+// If we multiply by 2^63, we no longer have a fractional part
+// So input is an integer value already.
+
+// double
+// if the exponent is >= 10033 => 34(true) = 52(decimal)
+// 34 + 3ff = 433
+// we have a significand of 53 bits 1.52-bits. (implicit 1)
+// If we multiply by 2^52, we no longer have a fractional part
+// So input is an integer value already.
+
+// single
+// if the exponent is >= 10016 => 17(true) = 23(decimal)
+// we have a significand of 53 bits 1.52-bits. (implicit 1)
+// If we multiply by 2^52, we no longer have a fractional part
+// So input is an integer value already.
+
+// If x is NAN, ZERO, or INFINITY, then return
+
+// qnan snan inf norm unorm 0 -+
+// 1 1 1 0 0 1 11 0xe7
+
+
+.align 32
+.global nearbyint#
+
+.section .text
+.proc nearbyint#
+.align 32
+
+
+nearbyint:
+
+{ .mfi
+ mov nearbyint_GR_fpsr = ar40 // Read the fpsr--need to check rc.s0
+ fcvt.fx.s1 NEARBYINT_INT_f8 = f8
+ addl nearbyint_GR_10033 = 0x10033, r0
+}
+{ .mfi
+ nop.m 999
+ fnorm.s1 NEARBYINT_NORM_f8 = f8
+ mov nearbyint_GR_17ones = 0x1FFFF
+;;
+}
+
+{ .mfi
+ nop.m 999
+ fclass.m.unc p6,p0 = f8, 0xe7
+ mov nearbyint_GR_rcs0_mask = 0x0c00
+;;
+}
+
+{ .mfb
+ nop.m 999
+(p6) fnorm.d f8 = f8
+(p6) br.ret.spnt b0 // Exit if x nan, inf, zero
+;;
+}
+
+{ .mfi
+ nop.m 999
+ fcvt.xf NEARBYINT_FLOAT_INT_f8 = NEARBYINT_INT_f8
+ nop.i 999
+;;
+}
+
+{ .mfi
+ getf.exp nearbyint_GR_signexp = NEARBYINT_NORM_f8
+ fcmp.eq.s0 p8,p0 = f8,f0 // Dummy op to set denormal
+ nop.i 999
+;;
+}
+
+
+{ .mii
+ nop.m 999
+ nop.i 999
+ and nearbyint_GR_exponent = nearbyint_GR_signexp, nearbyint_GR_17ones
+;;
+}
+
+{ .mmi
+ cmp.ge.unc p7,p6 = nearbyint_GR_exponent, nearbyint_GR_10033
+ and nearbyint_GR_rcs0 = nearbyint_GR_rcs0_mask, nearbyint_GR_fpsr
+ nop.i 999
+;;
+}
+
+// Check to see if s0 rounding mode is round to nearest. If not then set s2
+// rounding mode to that of s0 and repeat conversions.
+L(NEARBYINT_COMMON):
+{ .mfb
+ cmp.ne p11,p0 = nearbyint_GR_rcs0, r0
+(p6) fclass.m.unc p9,p10 = NEARBYINT_FLOAT_INT_f8, 0x07 // Test for result=0
+(p11) br.cond.spnt L(NEARBYINT_NOT_ROUND_NEAREST) // Branch if not round to nearest
+;;
+}
+
+{ .mfi
+ nop.m 999
+(p7) fnorm.d.s0 f8 = f8
+ nop.i 999
+;;
+}
+
+// If result is zero, merge sign of input
+{ .mfi
+ nop.m 999
+(p9) fmerge.s f8 = f8, NEARBYINT_FLOAT_INT_f8
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p10) fnorm.d f8 = NEARBYINT_FLOAT_INT_f8
+ br.ret.sptk b0
+;;
+}
+
+
+L(NEARBYINT_NOT_ROUND_NEAREST):
+// Set rounding mode of s2 to that of s0
+{ .mfi
+ mov nearbyint_GR_rcs0 = r0 // Clear so we don't come back here
+ fsetc.s2 0x7f, 0x40
+ nop.i 999
+;;
+}
+
+{ .mfi
+ nop.m 999
+ fcvt.fx.s2 NEARBYINT_INT_f8 = f8
+ nop.i 999
+;;
+}
+
+{ .mfb
+ nop.m 999
+ fcvt.xf NEARBYINT_FLOAT_INT_f8 = NEARBYINT_INT_f8
+ br.cond.sptk L(NEARBYINT_COMMON)
+;;
+}
+
+
+.endp nearbyint
+ASM_SIZE_DIRECTIVE(nearbyint)
diff --git a/sysdeps/ia64/fpu/s_nearbyintf.S b/sysdeps/ia64/fpu/s_nearbyintf.S
new file mode 100644
index 0000000..02806e3
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_nearbyintf.S
@@ -0,0 +1,221 @@
+.file "nearbyintf.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 10/19/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
+// Bob Norin, Tom Rowan, Shane Story, and Ping Tak Peter Tang of the
+// Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 10/19/2000: Created
+// 2/08/01 Corrected behavior for all rounding modes.
+//==============================================================
+//
+// API
+//==============================================================
+// float nearbyintf(float x)
+
+#include "libm_support.h"
+
+//
+// general registers used:
+//
+
+nearbyint_GR_signexp = r14
+nearbyint_GR_exponent = r15
+nearbyint_GR_17ones = r16
+nearbyint_GR_10033 = r17
+nearbyint_GR_fpsr = r18
+nearbyint_GR_rcs0 = r19
+nearbyint_GR_rcs0_mask = r20
+
+
+// predicate registers used:
+// p6-11
+
+// floating-point registers used:
+
+NEARBYINT_NORM_f8 = f9
+NEARBYINT_FLOAT_INT_f8 = f10
+NEARBYINT_INT_f8 = f11
+
+// Overview of operation
+//==============================================================
+
+// float nearbyintf(float x)
+// Return an integer value (represented as a float) that is x rounded to integer in current
+// rounding mode
+// *******************************************************************************
+
+// Set denormal flag for denormal input and
+// and take denormal fault if necessary.
+
+// Is the input an integer value already?
+
+// double_extended
+// if the exponent is >= 1003e => 3F(true) = 63(decimal)
+// we have a significand of 64 bits 1.63-bits.
+// If we multiply by 2^63, we no longer have a fractional part
+// So input is an integer value already.
+
+// double
+// if the exponent is >= 10033 => 34(true) = 52(decimal)
+// 34 + 3ff = 433
+// we have a significand of 53 bits 1.52-bits. (implicit 1)
+// If we multiply by 2^52, we no longer have a fractional part
+// So input is an integer value already.
+
+// single
+// if the exponent is >= 10016 => 17(true) = 23(decimal)
+// we have a significand of 53 bits 1.52-bits. (implicit 1)
+// If we multiply by 2^52, we no longer have a fractional part
+// So input is an integer value already.
+
+// If x is NAN, ZERO, or INFINITY, then return
+
+// qnan snan inf norm unorm 0 -+
+// 1 1 1 0 0 1 11 0xe7
+
+
+.align 32
+.global nearbyintf#
+
+.section .text
+.proc nearbyintf#
+.align 32
+
+
+nearbyintf:
+
+{ .mfi
+ mov nearbyint_GR_fpsr = ar40 // Read the fpsr--need to check rc.s0
+ fcvt.fx.s1 NEARBYINT_INT_f8 = f8
+ addl nearbyint_GR_10033 = 0x10016, r0
+}
+{ .mfi
+ nop.m 999
+ fnorm.s1 NEARBYINT_NORM_f8 = f8
+ mov nearbyint_GR_17ones = 0x1FFFF
+;;
+}
+
+{ .mfi
+ nop.m 999
+ fclass.m.unc p6,p0 = f8, 0xe7
+ mov nearbyint_GR_rcs0_mask = 0x0c00
+;;
+}
+
+{ .mfb
+ nop.m 999
+(p6) fnorm.s f8 = f8
+(p6) br.ret.spnt b0 // Exit if x nan, inf, zero
+;;
+}
+
+{ .mfi
+ nop.m 999
+ fcvt.xf NEARBYINT_FLOAT_INT_f8 = NEARBYINT_INT_f8
+ nop.i 999
+;;
+}
+
+{ .mfi
+ getf.exp nearbyint_GR_signexp = NEARBYINT_NORM_f8
+ fcmp.eq.s0 p8,p0 = f8,f0 // Dummy op to set denormal
+ nop.i 999
+;;
+}
+
+
+{ .mii
+ nop.m 999
+ nop.i 999
+ and nearbyint_GR_exponent = nearbyint_GR_signexp, nearbyint_GR_17ones
+;;
+}
+
+{ .mmi
+ cmp.ge.unc p7,p6 = nearbyint_GR_exponent, nearbyint_GR_10033
+ and nearbyint_GR_rcs0 = nearbyint_GR_rcs0_mask, nearbyint_GR_fpsr
+ nop.i 999
+;;
+}
+
+// Check to see if s0 rounding mode is round to nearest. If not then set s2
+// rounding mode to that of s0 and repeat conversions.
+L(NEARBYINT_COMMON):
+{ .mfb
+ cmp.ne p11,p0 = nearbyint_GR_rcs0, r0
+(p6) fclass.m.unc p9,p10 = NEARBYINT_FLOAT_INT_f8, 0x07 // Test for result=0
+(p11) br.cond.spnt L(NEARBYINT_NOT_ROUND_NEAREST) // Branch if not round to nearest
+;;
+}
+
+{ .mfi
+ nop.m 999
+(p7) fnorm.s.s0 f8 = f8
+ nop.i 999
+;;
+}
+
+// If result is zero, merge sign of input
+{ .mfi
+ nop.m 999
+(p9) fmerge.s f8 = f8, NEARBYINT_FLOAT_INT_f8
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p10) fnorm.s f8 = NEARBYINT_FLOAT_INT_f8
+ br.ret.sptk b0
+;;
+}
+
+
+L(NEARBYINT_NOT_ROUND_NEAREST):
+// Set rounding mode of s2 to that of s0
+{ .mfi
+ mov nearbyint_GR_rcs0 = r0 // Clear so we don't come back here
+ fsetc.s2 0x7f, 0x40
+ nop.i 999
+;;
+}
+
+{ .mfi
+ nop.m 999
+ fcvt.fx.s2 NEARBYINT_INT_f8 = f8
+ nop.i 999
+;;
+}
+
+{ .mfb
+ nop.m 999
+ fcvt.xf NEARBYINT_FLOAT_INT_f8 = NEARBYINT_INT_f8
+ br.cond.sptk L(NEARBYINT_COMMON)
+;;
+}
+
+
+.endp nearbyintf
+ASM_SIZE_DIRECTIVE(nearbyintf)
diff --git a/sysdeps/ia64/fpu/s_nearbyintl.S b/sysdeps/ia64/fpu/s_nearbyintl.S
new file mode 100644
index 0000000..df935d5
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_nearbyintl.S
@@ -0,0 +1,218 @@
+.file "nearbyintl.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 10/19/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
+// Bob Norin, Tom Rowan, Shane Story, and Ping Tak Peter Tang of the
+// Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 10/19/2000: Created
+// 2/08/01 Corrected behavior for all rounding modes.
+//==============================================================
+//
+// API
+//==============================================================
+// long double nearbyintl(long double x)
+
+#include "libm_support.h"
+
+//
+// general registers used:
+//
+
+nearbyint_GR_signexp = r14
+nearbyint_GR_exponent = r15
+nearbyint_GR_17ones = r16
+nearbyint_GR_10033 = r17
+nearbyint_GR_fpsr = r18
+nearbyint_GR_rcs0 = r19
+nearbyint_GR_rcs0_mask = r20
+
+
+// predicate registers used:
+// p6-11
+
+// floating-point registers used:
+
+NEARBYINT_NORM_f8 = f9
+NEARBYINT_FLOAT_INT_f8 = f10
+NEARBYINT_INT_f8 = f11
+NEARBYINT_SIGNED_FLOAT_INT_f8 = f12
+
+// Overview of operation
+//==============================================================
+
+// long double nearbyintl(long double x)
+// Return an integer value (represented as a long double) that is
+// x rounded to integer in current rounding mode
+// *******************************************************************************
+
+// Set denormal flag for denormal input and
+// and take denormal fault if necessary.
+
+// Is the input an integer value already?
+
+// double_extended
+// if the exponent is >= 1003e => 3F(true) = 63(decimal)
+// we have a significand of 64 bits 1.63-bits.
+// If we multiply by 2^63, we no longer have a fractional part
+// So input is an integer value already.
+
+// double
+// if the exponent is >= 10033 => 34(true) = 52(decimal)
+// 34 + 3ff = 433
+// we have a significand of 53 bits 1.52-bits. (implicit 1)
+// If we multiply by 2^52, we no longer have a fractional part
+// So input is an integer value already.
+
+// single
+// if the exponent is >= 10016 => 17(true) = 23(decimal)
+// we have a significand of 53 bits 1.52-bits. (implicit 1)
+// If we multiply by 2^52, we no longer have a fractional part
+// So input is an integer value already.
+
+// If x is NAN, ZERO, or INFINITY, then return
+
+// qnan snan inf norm unorm 0 -+
+// 1 1 1 0 0 1 11 0xe7
+
+
+.align 32
+.global nearbyintl#
+
+.section .text
+.proc nearbyintl#
+.align 32
+
+
+nearbyintl:
+
+{ .mfi
+ mov nearbyint_GR_fpsr = ar40 // Read the fpsr--need to check rc.s0
+ fcvt.fx.s1 NEARBYINT_INT_f8 = f8
+ addl nearbyint_GR_10033 = 0x1003e, r0
+}
+{ .mfi
+ nop.m 999
+ fnorm.s1 NEARBYINT_NORM_f8 = f8
+ mov nearbyint_GR_17ones = 0x1FFFF
+;;
+}
+
+{ .mfi
+ nop.m 999
+ fclass.m.unc p6,p0 = f8, 0xe7
+ mov nearbyint_GR_rcs0_mask = 0x0c00
+;;
+}
+
+{ .mfb
+ nop.m 999
+(p6) fnorm f8 = f8
+(p6) br.ret.spnt b0 // Exit if x nan, inf, zero
+;;
+}
+
+{ .mfi
+ nop.m 999
+ fcvt.xf NEARBYINT_FLOAT_INT_f8 = NEARBYINT_INT_f8
+ nop.i 999
+;;
+}
+
+{ .mfi
+ getf.exp nearbyint_GR_signexp = NEARBYINT_NORM_f8
+ fcmp.eq.s0 p8,p0 = f8,f0 // Dummy op to set denormal
+ nop.i 999
+;;
+}
+
+
+{ .mii
+ nop.m 999
+ nop.i 999
+ and nearbyint_GR_exponent = nearbyint_GR_signexp, nearbyint_GR_17ones
+;;
+}
+
+{ .mmi
+ cmp.ge.unc p7,p6 = nearbyint_GR_exponent, nearbyint_GR_10033
+ and nearbyint_GR_rcs0 = nearbyint_GR_rcs0_mask, nearbyint_GR_fpsr
+ nop.i 999
+;;
+}
+
+// Check to see if s0 rounding mode is round to nearest. If not then set s2
+// rounding mode to that of s0 and repeat conversions.
+// Must merge the original sign for cases where the result is zero or the input
+// is the largest that still has a fraction (0x1007dfffffffffff)
+L(NEARBYINT_COMMON):
+{ .mfb
+ cmp.ne p11,p0 = nearbyint_GR_rcs0, r0
+(p6) fmerge.s NEARBYINT_SIGNED_FLOAT_INT_f8 = f8, NEARBYINT_FLOAT_INT_f8
+(p11) br.cond.spnt L(NEARBYINT_NOT_ROUND_NEAREST) // Branch if not round to nearest
+;;
+}
+
+{ .mfi
+ nop.m 999
+(p7) fnorm.s0 f8 = f8
+ nop.i 999
+;;
+}
+
+{ .mfb
+ nop.m 999
+(p6) fnorm f8 = NEARBYINT_SIGNED_FLOAT_INT_f8
+ br.ret.sptk b0
+;;
+}
+
+
+L(NEARBYINT_NOT_ROUND_NEAREST):
+// Set rounding mode of s2 to that of s0
+{ .mfi
+ mov nearbyint_GR_rcs0 = r0 // Clear so we don't come back here
+ fsetc.s2 0x7f, 0x40
+ nop.i 999
+;;
+}
+
+{ .mfi
+ nop.m 999
+ fcvt.fx.s2 NEARBYINT_INT_f8 = f8
+ nop.i 999
+;;
+}
+
+{ .mfb
+ nop.m 999
+ fcvt.xf NEARBYINT_FLOAT_INT_f8 = NEARBYINT_INT_f8
+ br.cond.sptk L(NEARBYINT_COMMON)
+;;
+}
+
+
+.endp nearbyintl
+ASM_SIZE_DIRECTIVE(nearbyintl)
diff --git a/sysdeps/ia64/fpu/s_rint.S b/sysdeps/ia64/fpu/s_rint.S
new file mode 100644
index 0000000..fd99e8e
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_rint.S
@@ -0,0 +1,241 @@
+.file "rint.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00: Initial version
+// 2/08/01 Corrected behavior for all rounding modes.
+//
+// API
+//==============================================================
+// double rint(double x)
+
+#include "libm_support.h"
+
+//
+// general registers used:
+//
+rint_GR_FFFF = r14
+rint_GR_signexp = r15
+rint_GR_exponent = r16
+rint_GR_17ones = r17
+rint_GR_10033 = r18
+rint_GR_fpsr = r19
+rint_GR_rcs0 = r20
+rint_GR_rcs0_mask = r21
+
+
+// predicate registers used:
+// p6-11
+
+// floating-point registers used:
+
+RINT_NORM_f8 = f9
+RINT_FFFF = f10
+RINT_INEXACT = f11
+RINT_FLOAT_INT_f8 = f12
+RINT_INT_f8 = f13
+
+// Overview of operation
+//==============================================================
+
+// double rint(double x)
+// Return an integer value (represented as a double) that is x rounded to integer in current
+// rounding mode
+// Inexact is set if x != rint(x)
+// *******************************************************************************
+
+// Set denormal flag for denormal input and
+// and take denormal fault if necessary.
+
+// Is the input an integer value already?
+
+// double_extended
+// if the exponent is >= 1003e => 3F(true) = 63(decimal)
+// we have a significand of 64 bits 1.63-bits.
+// If we multiply by 2^63, we no longer have a fractional part
+// So input is an integer value already.
+
+// double
+// if the exponent is >= 10033 => 34(true) = 52(decimal)
+// 34 + 3ff = 433
+// we have a significand of 53 bits 1.52-bits. (implicit 1)
+// If we multiply by 2^52, we no longer have a fractional part
+// So input is an integer value already.
+
+// single
+// if the exponent is >= 10016 => 17(true) = 23(decimal)
+// we have a significand of 53 bits 1.52-bits. (implicit 1)
+// If we multiply by 2^52, we no longer have a fractional part
+// So input is an integer value already.
+
+// If x is NAN, ZERO, or INFINITY, then return
+
+// qnan snan inf norm unorm 0 -+
+// 1 1 1 0 0 1 11 0xe7
+
+
+.align 32
+.global rint#
+
+.section .text
+.proc rint#
+.align 32
+
+
+rint:
+#ifdef _LIBC
+.global __rint
+.type __rint,@function
+__rint:
+#endif
+
+{ .mfi
+ mov rint_GR_fpsr = ar40 // Read the fpsr--need to check rc.s0
+ fcvt.fx.s1 RINT_INT_f8 = f8
+ addl rint_GR_10033 = 0x10033, r0
+}
+{ .mfi
+ mov rint_GR_FFFF = -1
+ fnorm.s1 RINT_NORM_f8 = f8
+ mov rint_GR_17ones = 0x1FFFF
+;;
+}
+
+{ .mfi
+ setf.sig RINT_FFFF = rint_GR_FFFF
+ fclass.m.unc p6,p0 = f8, 0xe7
+ mov rint_GR_rcs0_mask = 0x0c00
+;;
+}
+
+{ .mfb
+ nop.m 999
+(p6) fnorm.d f8 = f8
+(p6) br.ret.spnt b0 // Exit if x nan, inf, zero
+;;
+}
+
+{ .mfi
+ nop.m 999
+ fcvt.xf RINT_FLOAT_INT_f8 = RINT_INT_f8
+ nop.i 999
+;;
+}
+
+{ .mfi
+ getf.exp rint_GR_signexp = RINT_NORM_f8
+ fcmp.eq.s0 p8,p0 = f8,f0 // Dummy op to set denormal
+ nop.i 999
+;;
+}
+
+
+{ .mii
+ nop.m 999
+ nop.i 999
+ and rint_GR_exponent = rint_GR_signexp, rint_GR_17ones
+;;
+}
+
+{ .mmi
+ cmp.ge.unc p7,p6 = rint_GR_exponent, rint_GR_10033
+ and rint_GR_rcs0 = rint_GR_rcs0_mask, rint_GR_fpsr
+ nop.i 999
+;;
+}
+
+// Check to see if s0 rounding mode is round to nearest. If not then set s2
+// rounding mode to that of s0 and repeat conversions.
+L(RINT_COMMON):
+{ .mfb
+ cmp.ne p11,p0 = rint_GR_rcs0, r0
+(p6) fclass.m.unc p9,p10 = RINT_FLOAT_INT_f8, 0x07 // Test for result=0
+(p11) br.cond.spnt L(RINT_NOT_ROUND_NEAREST) // Branch if not round to nearest
+;;
+}
+
+{ .mfi
+ nop.m 999
+(p6) fcmp.eq.unc.s1 p0,p8 = RINT_FLOAT_INT_f8, RINT_NORM_f8
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p7) fnorm.d.s0 f8 = f8
+ nop.i 999
+;;
+}
+
+// If result is zero, merge sign of input
+{ .mfi
+ nop.m 999
+(p9) fmerge.s f8 = f8, RINT_FLOAT_INT_f8
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p10) fnorm.d f8 = RINT_FLOAT_INT_f8
+ nop.i 999
+;;
+}
+
+{ .mfb
+ nop.m 999
+(p8) fmpy.s0 RINT_INEXACT = RINT_FFFF,RINT_FFFF // Dummy to set inexact
+ br.ret.sptk b0
+;;
+}
+
+L(RINT_NOT_ROUND_NEAREST):
+// Set rounding mode of s2 to that of s0
+{ .mfi
+ mov rint_GR_rcs0 = r0 // Clear so we don't come back here
+ fsetc.s2 0x7f, 0x40
+ nop.i 999
+;;
+}
+
+{ .mfi
+ nop.m 999
+ fcvt.fx.s2 RINT_INT_f8 = f8
+ nop.i 999
+;;
+}
+
+{ .mfb
+ nop.m 999
+ fcvt.xf RINT_FLOAT_INT_f8 = RINT_INT_f8
+ br.cond.sptk L(RINT_COMMON)
+;;
+}
+
+
+.endp rint
+ASM_SIZE_DIRECTIVE(rint)
+#ifdef _LIBC
+ASM_SIZE_DIRECTIVE(__rint)
+#endif
diff --git a/sysdeps/ia64/fpu/s_rintf.S b/sysdeps/ia64/fpu/s_rintf.S
new file mode 100644
index 0000000..78742dc
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_rintf.S
@@ -0,0 +1,241 @@
+.file "rintf.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00: Initial version
+// 2/08/01 Corrected behavior for all rounding modes.
+//
+// API
+//==============================================================
+// float rintf(float x)
+
+#include "libm_support.h"
+
+//
+// general registers used:
+//
+rint_GR_FFFF = r14
+rint_GR_signexp = r15
+rint_GR_exponent = r16
+rint_GR_17ones = r17
+rint_GR_10033 = r18
+rint_GR_fpsr = r19
+rint_GR_rcs0 = r20
+rint_GR_rcs0_mask = r21
+
+
+// predicate registers used:
+// p6-11
+
+// floating-point registers used:
+
+RINT_NORM_f8 = f9
+RINT_FFFF = f10
+RINT_INEXACT = f11
+RINT_FLOAT_INT_f8 = f12
+RINT_INT_f8 = f13
+
+// Overview of operation
+//==============================================================
+
+// float rintf(float x)
+// Return an integer value (represented as a float) that is x rounded to integer in current
+// rounding mode
+// Inexact is set if x != rintf(x)
+// *******************************************************************************
+
+// Set denormal flag for denormal input and
+// and take denormal fault if necessary.
+
+// Is the input an integer value already?
+
+// double_extended
+// if the exponent is >= 1003e => 3F(true) = 63(decimal)
+// we have a significand of 64 bits 1.63-bits.
+// If we multiply by 2^63, we no longer have a fractional part
+// So input is an integer value already.
+
+// double
+// if the exponent is >= 10033 => 34(true) = 52(decimal)
+// 34 + 3ff = 433
+// we have a significand of 53 bits 1.52-bits. (implicit 1)
+// If we multiply by 2^52, we no longer have a fractional part
+// So input is an integer value already.
+
+// single
+// if the exponent is >= 10016 => 17(true) = 23(decimal)
+// we have a significand of 53 bits 1.52-bits. (implicit 1)
+// If we multiply by 2^52, we no longer have a fractional part
+// So input is an integer value already.
+
+// If x is NAN, ZERO, or INFINITY, then return
+
+// qnan snan inf norm unorm 0 -+
+// 1 1 1 0 0 1 11 0xe7
+
+
+.align 32
+.global rintf#
+
+.section .text
+.proc rintf#
+.align 32
+
+
+rintf:
+#ifdef _LIBC
+.global __rintf
+.type __rintf,@function
+__rintf:
+#endif
+
+{ .mfi
+ mov rint_GR_fpsr = ar40 // Read the fpsr--need to check rc.s0
+ fcvt.fx.s1 RINT_INT_f8 = f8
+ addl rint_GR_10033 = 0x10016, r0
+}
+{ .mfi
+ mov rint_GR_FFFF = -1
+ fnorm.s1 RINT_NORM_f8 = f8
+ mov rint_GR_17ones = 0x1FFFF
+;;
+}
+
+{ .mfi
+ setf.sig RINT_FFFF = rint_GR_FFFF
+ fclass.m.unc p6,p0 = f8, 0xe7
+ mov rint_GR_rcs0_mask = 0x0c00
+;;
+}
+
+{ .mfb
+ nop.m 999
+(p6) fnorm.s f8 = f8
+(p6) br.ret.spnt b0 // Exit if x nan, inf, zero
+;;
+}
+
+{ .mfi
+ nop.m 999
+ fcvt.xf RINT_FLOAT_INT_f8 = RINT_INT_f8
+ nop.i 999
+;;
+}
+
+{ .mfi
+ getf.exp rint_GR_signexp = RINT_NORM_f8
+ fcmp.eq.s0 p8,p0 = f8,f0 // Dummy op to set denormal
+ nop.i 999
+;;
+}
+
+
+{ .mii
+ nop.m 999
+ nop.i 999
+ and rint_GR_exponent = rint_GR_signexp, rint_GR_17ones
+;;
+}
+
+{ .mmi
+ cmp.ge.unc p7,p6 = rint_GR_exponent, rint_GR_10033
+ and rint_GR_rcs0 = rint_GR_rcs0_mask, rint_GR_fpsr
+ nop.i 999
+;;
+}
+
+// Check to see if s0 rounding mode is round to nearest. If not then set s2
+// rounding mode to that of s0 and repeat conversions.
+L(RINT_COMMON):
+{ .mfb
+ cmp.ne p11,p0 = rint_GR_rcs0, r0
+(p6) fclass.m.unc p9,p10 = RINT_FLOAT_INT_f8, 0x07 // Test for result=0
+(p11) br.cond.spnt L(RINT_NOT_ROUND_NEAREST) // Branch if not round to nearest
+;;
+}
+
+{ .mfi
+ nop.m 999
+(p6) fcmp.eq.unc.s1 p0,p8 = RINT_FLOAT_INT_f8, RINT_NORM_f8
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p7) fnorm.s.s0 f8 = f8
+ nop.i 999
+;;
+}
+
+// If result is zero, merge sign of input
+{ .mfi
+ nop.m 999
+(p9) fmerge.s f8 = f8, RINT_FLOAT_INT_f8
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p10) fnorm.s f8 = RINT_FLOAT_INT_f8
+ nop.i 999
+;;
+}
+
+{ .mfb
+ nop.m 999
+(p8) fmpy.s0 RINT_INEXACT = RINT_FFFF,RINT_FFFF // Dummy to set inexact
+ br.ret.sptk b0
+;;
+}
+
+L(RINT_NOT_ROUND_NEAREST):
+// Set rounding mode of s2 to that of s0
+{ .mfi
+ mov rint_GR_rcs0 = r0 // Clear so we don't come back here
+ fsetc.s2 0x7f, 0x40
+ nop.i 999
+;;
+}
+
+{ .mfi
+ nop.m 999
+ fcvt.fx.s2 RINT_INT_f8 = f8
+ nop.i 999
+;;
+}
+
+{ .mfb
+ nop.m 999
+ fcvt.xf RINT_FLOAT_INT_f8 = RINT_INT_f8
+ br.cond.sptk L(RINT_COMMON)
+;;
+}
+
+
+.endp rintf
+ASM_SIZE_DIRECTIVE(rintf)
+#ifdef _LIBC
+ASM_SIZE_DIRECTIVE(__rintf)
+#endif
diff --git a/sysdeps/ia64/fpu/s_rintl.S b/sysdeps/ia64/fpu/s_rintl.S
new file mode 100644
index 0000000..9bf7492
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_rintl.S
@@ -0,0 +1,239 @@
+.file "rintl.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00: Initial version
+// 5/24/00 Fixed case of 2^63 - 1 + 0.5 (0x1007dffffffffffffffff)
+// 2/08/01 Corrected behavior for all rounding modes.
+//
+// API
+//==============================================================
+// long double rintl(long double x)
+
+#include "libm_support.h"
+
+//
+// general registers used:
+//
+rint_GR_FFFF = r14
+rint_GR_signexp = r15
+rint_GR_exponent = r16
+rint_GR_17ones = r17
+rint_GR_10033 = r18
+rint_GR_fpsr = r19
+rint_GR_rcs0 = r20
+rint_GR_rcs0_mask = r21
+
+
+// predicate registers used:
+// p6-11
+
+// floating-point registers used:
+
+RINT_NORM_f8 = f9
+RINT_FFFF = f10
+RINT_INEXACT = f11
+RINT_FLOAT_INT_f8 = f12
+RINT_INT_f8 = f13
+RINT_SIGNED_FLOAT_INT_f8 = f14
+
+// Overview of operation
+//==============================================================
+
+// long double rintl(long double x)
+// Return an integer value (represented as a long double) that is x rounded to integer in current
+// rounding mode
+// Inexact is set if x != rintl(x)
+// *******************************************************************************
+
+// Set denormal flag for denormal input and
+// and take denormal fault if necessary.
+
+// Is the input an integer value already?
+
+// double_extended
+// if the exponent is >= 1003e => 3F(true) = 63(decimal)
+// we have a significand of 64 bits 1.63-bits.
+// If we multiply by 2^63, we no longer have a fractional part
+// So input is an integer value already.
+
+// double
+// if the exponent is >= 10033 => 34(true) = 52(decimal)
+// 34 + 3ff = 433
+// we have a significand of 53 bits 1.52-bits. (implicit 1)
+// If we multiply by 2^52, we no longer have a fractional part
+// So input is an integer value already.
+
+// single
+// if the exponent is >= 10016 => 17(true) = 23(decimal)
+// we have a significand of 53 bits 1.52-bits. (implicit 1)
+// If we multiply by 2^52, we no longer have a fractional part
+// So input is an integer value already.
+
+// If x is NAN, ZERO, or INFINITY, then return
+
+// qnan snan inf norm unorm 0 -+
+// 1 1 1 0 0 1 11 0xe7
+
+
+.align 32
+.global rintl#
+
+.section .text
+.proc rintl#
+.align 32
+
+
+rintl:
+#ifdef _LIBC
+.global __rintl
+.type __rintl,@function
+__rintl:
+#endif
+
+{ .mfi
+ mov rint_GR_fpsr = ar40 // Read the fpsr--need to check rc.s0
+ fcvt.fx.s1 RINT_INT_f8 = f8
+ addl rint_GR_10033 = 0x1003e, r0
+}
+{ .mfi
+ mov rint_GR_FFFF = -1
+ fnorm.s1 RINT_NORM_f8 = f8
+ mov rint_GR_17ones = 0x1FFFF
+;;
+}
+
+{ .mfi
+ setf.sig RINT_FFFF = rint_GR_FFFF
+ fclass.m.unc p6,p0 = f8, 0xe7
+ mov rint_GR_rcs0_mask = 0x0c00
+;;
+}
+
+{ .mfb
+ nop.m 999
+(p6) fnorm f8 = f8
+(p6) br.ret.spnt b0 // Exit if x nan, inf, zero
+;;
+}
+
+{ .mfi
+ nop.m 999
+ fcvt.xf RINT_FLOAT_INT_f8 = RINT_INT_f8
+ nop.i 999
+;;
+}
+
+{ .mfi
+ getf.exp rint_GR_signexp = RINT_NORM_f8
+ fcmp.eq.s0 p8,p0 = f8,f0 // Dummy op to set denormal
+ nop.i 999
+;;
+}
+
+
+{ .mii
+ nop.m 999
+ nop.i 999
+ and rint_GR_exponent = rint_GR_signexp, rint_GR_17ones
+;;
+}
+
+{ .mmi
+ cmp.ge.unc p7,p6 = rint_GR_exponent, rint_GR_10033
+ and rint_GR_rcs0 = rint_GR_rcs0_mask, rint_GR_fpsr
+ nop.i 999
+;;
+}
+
+// Check to see if s0 rounding mode is round to nearest. If not then set s2
+// rounding mode to that of s0 and repeat conversions.
+// Must merge the original sign for cases where the result is zero or the input
+// is the largest that still has a fraction (0x1007dfffffffffff)
+L(RINT_COMMON):
+{ .mfb
+ cmp.ne p11,p0 = rint_GR_rcs0, r0
+(p6) fmerge.s RINT_SIGNED_FLOAT_INT_f8 = f8, RINT_FLOAT_INT_f8
+(p11) br.cond.spnt L(RINT_NOT_ROUND_NEAREST) // Branch if not round to nearest
+;;
+}
+
+{ .mfi
+ nop.m 999
+(p6) fcmp.eq.unc.s1 p0,p8 = RINT_FLOAT_INT_f8, RINT_NORM_f8
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p7) fnorm.s0 f8 = f8
+ nop.i 999
+;;
+}
+
+{ .mfi
+ nop.m 999
+(p6) fnorm f8 = RINT_SIGNED_FLOAT_INT_f8
+ nop.i 999
+;;
+}
+
+{ .mfb
+ nop.m 999
+(p8) fmpy.s0 RINT_INEXACT = RINT_FFFF,RINT_FFFF // Dummy to set inexact
+ br.ret.sptk b0
+;;
+}
+
+L(RINT_NOT_ROUND_NEAREST):
+// Set rounding mode of s2 to that of s0
+{ .mfi
+ mov rint_GR_rcs0 = r0 // Clear so we don't come back here
+ fsetc.s2 0x7f, 0x40
+ nop.i 999
+;;
+}
+
+{ .mfi
+ nop.m 999
+ fcvt.fx.s2 RINT_INT_f8 = f8
+ nop.i 999
+;;
+}
+
+{ .mfb
+ nop.m 999
+ fcvt.xf RINT_FLOAT_INT_f8 = RINT_INT_f8
+ br.cond.sptk L(RINT_COMMON)
+;;
+}
+
+
+.endp rintl
+ASM_SIZE_DIRECTIVE(rintl)
+#ifdef _LIBC
+ASM_SIZE_DIRECTIVE(__rintl)
+#endif
diff --git a/sysdeps/ia64/fpu/s_round.S b/sysdeps/ia64/fpu/s_round.S
new file mode 100644
index 0000000..30e8af8
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_round.S
@@ -0,0 +1,236 @@
+.file "round.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 10/25/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
+// Bob Norin, Tom Rowan, Shane Story, and Ping Tak Peter Tang of the
+// Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 10/25/2000: Created
+//==============================================================
+//
+// API
+//==============================================================
+// double round(double x)
+//
+
+#include "libm_support.h"
+
+// general input registers:
+//
+round_GR_half = r14
+round_GR_big = r15
+round_GR_expmask = r16
+round_GR_signexp = r17
+round_GR_exp = r18
+round_GR_expdiff = r19
+
+// predicate registers used:
+// p6 - p10
+
+// floating-point registers used:
+
+ROUND_NORM_f8 = f9
+ROUND_TRUNC_f8 = f10
+ROUND_RINT_f8 = f11
+ROUND_FLOAT_TRUNC_f8 = f12
+ROUND_FLOAT_RINT_f8 = f13
+ROUND_REMAINDER = f14
+ROUND_HALF = f15
+
+// Overview of operation
+//==============================================================
+
+// double round(double x)
+// Return an integer value (represented as a double) that is x
+// rounded to nearest integer, halfway cases rounded away from
+// zero.
+// if x>0 result = trunc(x+0.5)
+// if x<0 result = trunc(x-0.5)
+// *******************************************************************************
+
+// Set denormal flag for denormal input and
+// and take denormal fault if necessary.
+
+// If x is NAN, ZERO, INFINITY, or >= 2^52 then return
+
+// qnan snan inf norm unorm 0 -+
+// 1 1 1 0 0 1 11 0xe7
+
+
+.align 32
+.global round#
+
+.section .text
+.proc round#
+.align 32
+
+
+round:
+
+// Get exponent for +0.5
+// Truncate x to integer
+{ .mfi
+ addl round_GR_half = 0x0fffe, r0
+ fcvt.fx.trunc.s1 ROUND_TRUNC_f8 = f8
+ nop.i 999
+}
+
+// Get signexp of x
+// Normalize input
+// Form exponent mask
+{ .mfi
+ getf.exp round_GR_signexp = f8
+ fnorm ROUND_NORM_f8 = f8
+ addl round_GR_expmask = 0x1ffff, r0 ;;
+}
+
+// Form +0.5
+// Round x to integer
+{ .mfi
+ setf.exp ROUND_HALF = round_GR_half
+ fcvt.fx.s1 ROUND_RINT_f8 = f8
+ nop.i 999 ;;
+}
+// Get exp of x
+// Test for NAN, INF, ZERO
+// Get exponent at which input has no fractional part
+{ .mfi
+ and round_GR_exp = round_GR_expmask, round_GR_signexp
+ fclass.m p8,p9 = f8,0xe7
+ addl round_GR_big = 0x10033, r0 ;;
+}
+
+// Get exp-bigexp
+// If exp is so big there is no fractional part, then turn on p8, off p9
+{ .mmi
+ sub round_GR_expdiff = round_GR_exp, round_GR_big ;;
+#ifdef _LIBC
+(p9) cmp.lt.or.andcm p8,p9 = r0, round_GR_expdiff
+#else
+(p9) cmp.ge.or.andcm p8,p9 = round_GR_expdiff, r0
+#endif
+ nop.i 999 ;;
+}
+
+// Set p6 if x<0, else set p7
+{ .mfi
+ nop.m 999
+(p9) fcmp.lt.unc p6,p7 = f8,f0
+ nop.i 999
+}
+
+// If NAN, INF, ZERO, or no fractional part, result is just normalized input
+{ .mfi
+ nop.m 999
+(p8) fnorm.d.s0 f8 = f8
+ nop.i 999 ;;
+}
+
+// Float the truncated integer
+{ .mfi
+ nop.m 999
+(p9) fcvt.xf ROUND_FLOAT_TRUNC_f8 = ROUND_TRUNC_f8
+ nop.i 999 ;;
+}
+
+// Float the rounded integer to get preliminary result
+{ .mfi
+ nop.m 999
+(p9) fcvt.xf ROUND_FLOAT_RINT_f8 = ROUND_RINT_f8
+ nop.i 999 ;;
+}
+
+// If x<0 and the difference of the truncated input minus the input is 0.5
+// then result = truncated input - 1.0
+// Else if x>0 and the difference of the input minus truncated input is 0.5
+// then result = truncated input + 1.0
+// Else
+// result = rounded input
+// Endif
+{ .mfi
+ nop.m 999
+(p6) fsub.s1 ROUND_REMAINDER = ROUND_FLOAT_TRUNC_f8, ROUND_NORM_f8
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p7) fsub.s1 ROUND_REMAINDER = ROUND_NORM_f8, ROUND_FLOAT_TRUNC_f8
+ nop.i 999 ;;
+}
+
+// Assume preliminary result is rounded integer
+{ .mfi
+ nop.m 999
+(p9) fnorm.d.s0 f8 = ROUND_FLOAT_RINT_f8
+ nop.i 999
+}
+
+// If x<0, test if result=0
+{ .mfi
+ nop.m 999
+(p6) fcmp.eq.unc p10,p0 = ROUND_FLOAT_RINT_f8,f0
+ nop.i 999 ;;
+}
+
+// If x<0 and result=0, set result=-0
+{ .mfi
+ nop.m 999
+(p10) fmerge.ns f8 = f1,f8
+ nop.i 999
+}
+
+// If x<0, test if remainder=0.5
+{ .mfi
+ nop.m 999
+(p6) fcmp.eq.unc p6,p0 = ROUND_REMAINDER, ROUND_HALF
+ nop.i 999 ;;
+}
+
+// If x>0, test if remainder=0.5
+{ .mfi
+ nop.m 999
+(p7) fcmp.eq.unc p7,p0 = ROUND_REMAINDER, ROUND_HALF
+ nop.i 999 ;;
+}
+
+// If x<0 and remainder=0.5, result=truncated-1.0
+// If x>0 and remainder=0.5, result=truncated+1.0
+// Exit
+.pred.rel "mutex",p6,p7
+{ .mfi
+ nop.m 999
+(p6) fsub.d.s0 f8 = ROUND_FLOAT_TRUNC_f8,f1
+ nop.i 999
+}
+
+{ .mfb
+ nop.m 999
+(p7) fadd.d.s0 f8 = ROUND_FLOAT_TRUNC_f8,f1
+ br.ret.sptk b0 ;;
+}
+
+.endp round
+ASM_SIZE_DIRECTIVE(round)
diff --git a/sysdeps/ia64/fpu/s_roundf.S b/sysdeps/ia64/fpu/s_roundf.S
new file mode 100644
index 0000000..9aa0d6c
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_roundf.S
@@ -0,0 +1,236 @@
+.file "roundf.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 10/25/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
+// Bob Norin, Tom Rowan, Shane Story, and Ping Tak Peter Tang of the
+// Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 10/25/2000: Created
+//==============================================================
+//
+// API
+//==============================================================
+// float roundf(float x)
+//
+
+#include "libm_support.h"
+
+// general input registers:
+//
+roundf_GR_half = r14
+roundf_GR_big = r15
+roundf_GR_expmask = r16
+roundf_GR_signexp = r17
+roundf_GR_exp = r18
+roundf_GR_expdiff = r19
+
+// predicate registers used:
+// p6 - p10
+
+// floating-point registers used:
+
+ROUNDF_NORM_f8 = f9
+ROUNDF_TRUNC_f8 = f10
+ROUNDF_RINT_f8 = f11
+ROUNDF_FLOAT_TRUNC_f8 = f12
+ROUNDF_FLOAT_RINT_f8 = f13
+ROUNDF_REMAINDER = f14
+ROUNDF_HALF = f15
+
+// Overview of operation
+//==============================================================
+
+// float roundf(float x)
+// Return an integer value (represented as a float) that is x
+// rounded to nearest integer, halfway cases rounded away from
+// zero.
+// if x>0 result = trunc(x+0.5)
+// if x<0 result = trunc(x-0.5)
+// *******************************************************************************
+
+// Set denormal flag for denormal input and
+// and take denormal fault if necessary.
+
+// If x is NAN, ZERO, INFINITY, or >= 2^23 then return
+
+// qnan snan inf norm unorm 0 -+
+// 1 1 1 0 0 1 11 0xe7
+
+
+.align 32
+.global roundf#
+
+.section .text
+.proc roundf#
+.align 32
+
+
+roundf:
+
+// Get exponent for +0.5
+// Truncate x to integer
+{ .mfi
+ addl roundf_GR_half = 0x0fffe, r0
+ fcvt.fx.trunc.s1 ROUNDF_TRUNC_f8 = f8
+ nop.i 999
+}
+
+// Get signexp of x
+// Normalize input
+// Form exponent mask
+{ .mfi
+ getf.exp roundf_GR_signexp = f8
+ fnorm ROUNDF_NORM_f8 = f8
+ addl roundf_GR_expmask = 0x1ffff, r0 ;;
+}
+
+// Form +0.5
+// Round x to integer
+{ .mfi
+ setf.exp ROUNDF_HALF = roundf_GR_half
+ fcvt.fx.s1 ROUNDF_RINT_f8 = f8
+ nop.i 999 ;;
+}
+// Get exp of x
+// Test for NAN, INF, ZERO
+// Get exponent at which input has no fractional part
+{ .mfi
+ and roundf_GR_exp = roundf_GR_expmask, roundf_GR_signexp
+ fclass.m p8,p9 = f8,0xe7
+ addl roundf_GR_big = 0x10016, r0 ;;
+}
+
+// Get exp-bigexp
+// If exp is so big there is no fractional part, then turn on p8, off p9
+{ .mmi
+ sub roundf_GR_expdiff = roundf_GR_exp, roundf_GR_big ;;
+#ifdef _LIBC
+(p9) cmp.lt.or.andcm p8,p9 = r0, roundf_GR_expdiff
+#else
+(p9) cmp.ge.or.andcm p8,p9 = roundf_GR_expdiff, r0
+#endif
+ nop.i 999 ;;
+}
+
+// Set p6 if x<0, else set p7
+{ .mfi
+ nop.m 999
+(p9) fcmp.lt.unc p6,p7 = f8,f0
+ nop.i 999
+}
+
+// If NAN, INF, ZERO, or no fractional part, result is just normalized input
+{ .mfi
+ nop.m 999
+(p8) fnorm.s.s0 f8 = f8
+ nop.i 999 ;;
+}
+
+// Float the truncated integer
+{ .mfi
+ nop.m 999
+(p9) fcvt.xf ROUNDF_FLOAT_TRUNC_f8 = ROUNDF_TRUNC_f8
+ nop.i 999 ;;
+}
+
+// Float the rounded integer to get preliminary result
+{ .mfi
+ nop.m 999
+(p9) fcvt.xf ROUNDF_FLOAT_RINT_f8 = ROUNDF_RINT_f8
+ nop.i 999 ;;
+}
+
+// If x<0 and the difference of the truncated input minus the input is 0.5
+// then result = truncated input - 1.0
+// Else if x>0 and the difference of the input minus truncated input is 0.5
+// then result = truncated input + 1.0
+// Else
+// result = rounded input
+// Endif
+{ .mfi
+ nop.m 999
+(p6) fsub.s1 ROUNDF_REMAINDER = ROUNDF_FLOAT_TRUNC_f8, ROUNDF_NORM_f8
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p7) fsub.s1 ROUNDF_REMAINDER = ROUNDF_NORM_f8, ROUNDF_FLOAT_TRUNC_f8
+ nop.i 999 ;;
+}
+
+// Assume preliminary result is rounded integer
+{ .mfi
+ nop.m 999
+(p9) fnorm.s.s0 f8 = ROUNDF_FLOAT_RINT_f8
+ nop.i 999
+}
+
+// If x<0, test if result=0
+{ .mfi
+ nop.m 999
+(p6) fcmp.eq.unc p10,p0 = ROUNDF_FLOAT_RINT_f8,f0
+ nop.i 999 ;;
+}
+
+// If x<0 and result=0, set result=-0
+{ .mfi
+ nop.m 999
+(p10) fmerge.ns f8 = f1,f8
+ nop.i 999
+}
+
+// If x<0, test if remainder=0.5
+{ .mfi
+ nop.m 999
+(p6) fcmp.eq.unc p6,p0 = ROUNDF_REMAINDER, ROUNDF_HALF
+ nop.i 999 ;;
+}
+
+// If x>0, test if remainder=0.5
+{ .mfi
+ nop.m 999
+(p7) fcmp.eq.unc p7,p0 = ROUNDF_REMAINDER, ROUNDF_HALF
+ nop.i 999 ;;
+}
+
+// If x<0 and remainder=0.5, result=truncated-1.0
+// If x>0 and remainder=0.5, result=truncated+1.0
+// Exit
+.pred.rel "mutex",p6,p7
+{ .mfi
+ nop.m 999
+(p6) fsub.s.s0 f8 = ROUNDF_FLOAT_TRUNC_f8,f1
+ nop.i 999
+}
+
+{ .mfb
+ nop.m 999
+(p7) fadd.s.s0 f8 = ROUNDF_FLOAT_TRUNC_f8,f1
+ br.ret.sptk b0 ;;
+}
+
+.endp roundf
+ASM_SIZE_DIRECTIVE(roundf)
diff --git a/sysdeps/ia64/fpu/s_roundl.S b/sysdeps/ia64/fpu/s_roundl.S
new file mode 100644
index 0000000..f581d2f
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_roundl.S
@@ -0,0 +1,236 @@
+.file "roundl.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 10/25/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
+// Bob Norin, Tom Rowan, Shane Story, and Ping Tak Peter Tang of the
+// Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 10/25/2000: Created
+//==============================================================
+//
+// API
+//==============================================================
+// long double roundl(long double x)
+//
+
+#include "libm_support.h"
+
+// general input registers:
+//
+roundl_GR_half = r14
+roundl_GR_big = r15
+roundl_GR_expmask = r16
+roundl_GR_signexp = r17
+roundl_GR_exp = r18
+roundl_GR_expdiff = r19
+
+// predicate registers used:
+// p6 - p10
+
+// floating-point registers used:
+
+ROUNDL_NORM_f8 = f9
+ROUNDL_TRUNC_f8 = f10
+ROUNDL_RINT_f8 = f11
+ROUNDL_FLOAT_TRUNC_f8 = f12
+ROUNDL_FLOAT_RINT_f8 = f13
+ROUNDL_REMAINDER = f14
+ROUNDL_HALF = f15
+
+// Overview of operation
+//==============================================================
+
+// long double roundl(long double x)
+// Return an integer value (represented as a long double) that is x
+// rounded to nearest integer, halfway cases rounded away from
+// zero.
+// if x>0 result = trunc(x+0.5)
+// if x<0 result = trunc(x-0.5)
+// *******************************************************************************
+
+// Set denormal flag for denormal input and
+// and take denormal fault if necessary.
+
+// If x is NAN, ZERO, INFINITY, or >= 2^63 then return
+
+// qnan snan inf norm unorm 0 -+
+// 1 1 1 0 0 1 11 0xe7
+
+
+.align 32
+.global roundl#
+
+.section .text
+.proc roundl#
+.align 32
+
+
+roundl:
+
+// Get exponent for +0.5
+// Truncate x to integer
+{ .mfi
+ addl roundl_GR_half = 0x0fffe, r0
+ fcvt.fx.trunc.s1 ROUNDL_TRUNC_f8 = f8
+ nop.i 999
+}
+
+// Get signexp of x
+// Normalize input
+// Form exponent mask
+{ .mfi
+ getf.exp roundl_GR_signexp = f8
+ fnorm ROUNDL_NORM_f8 = f8
+ addl roundl_GR_expmask = 0x1ffff, r0 ;;
+}
+
+// Form +0.5
+// Round x to integer
+{ .mfi
+ setf.exp ROUNDL_HALF = roundl_GR_half
+ fcvt.fx.s1 ROUNDL_RINT_f8 = f8
+ nop.i 999 ;;
+}
+// Get exp of x
+// Test for NAN, INF, ZERO
+// Get exponent at which input has no fractional part
+{ .mfi
+ and roundl_GR_exp = roundl_GR_expmask, roundl_GR_signexp
+ fclass.m p8,p9 = f8,0xe7
+ addl roundl_GR_big = 0x1003e, r0 ;;
+}
+
+// Get exp-bigexp
+// If exp is so big there is no fractional part, then turn on p8, off p9
+{ .mmi
+ sub roundl_GR_expdiff = roundl_GR_exp, roundl_GR_big ;;
+#ifdef _LIBC
+(p9) cmp.lt.or.andcm p8,p9 = r0, roundl_GR_expdiff
+#else
+(p9) cmp.ge.or.andcm p8,p9 = roundl_GR_expdiff, r0
+#endif
+ nop.i 999 ;;
+}
+
+// Set p6 if x<0, else set p7
+{ .mfi
+ nop.m 999
+(p9) fcmp.lt.unc p6,p7 = f8,f0
+ nop.i 999
+}
+
+// If NAN, INF, ZERO, or no fractional part, result is just normalized input
+{ .mfi
+ nop.m 999
+(p8) fnorm.s0 f8 = f8
+ nop.i 999 ;;
+}
+
+// Float the truncated integer
+{ .mfi
+ nop.m 999
+(p9) fcvt.xf ROUNDL_FLOAT_TRUNC_f8 = ROUNDL_TRUNC_f8
+ nop.i 999 ;;
+}
+
+// Float the rounded integer to get preliminary result
+{ .mfi
+ nop.m 999
+(p9) fcvt.xf ROUNDL_FLOAT_RINT_f8 = ROUNDL_RINT_f8
+ nop.i 999 ;;
+}
+
+// If x<0 and the difference of the truncated input minus the input is 0.5
+// then result = truncated input - 1.0
+// Else if x>0 and the difference of the input minus truncated input is 0.5
+// then result = truncated input + 1.0
+// Else
+// result = rounded input
+// Endif
+{ .mfi
+ nop.m 999
+(p6) fsub.s1 ROUNDL_REMAINDER = ROUNDL_FLOAT_TRUNC_f8, ROUNDL_NORM_f8
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+(p7) fsub.s1 ROUNDL_REMAINDER = ROUNDL_NORM_f8, ROUNDL_FLOAT_TRUNC_f8
+ nop.i 999 ;;
+}
+
+// Assume preliminary result is rounded integer
+{ .mfi
+ nop.m 999
+(p9) fnorm.s0 f8 = ROUNDL_FLOAT_RINT_f8
+ nop.i 999
+}
+
+// If x<0, test if result=0
+{ .mfi
+ nop.m 999
+(p6) fcmp.eq.unc p10,p0 = ROUNDL_FLOAT_RINT_f8,f0
+ nop.i 999 ;;
+}
+
+// If x<0 and result=0, set result=-0
+{ .mfi
+ nop.m 999
+(p10) fmerge.ns f8 = f1,f8
+ nop.i 999
+}
+
+// If x<0, test if remainder=0.5
+{ .mfi
+ nop.m 999
+(p6) fcmp.eq.unc p6,p0 = ROUNDL_REMAINDER, ROUNDL_HALF
+ nop.i 999 ;;
+}
+
+// If x>0, test if remainder=0.5
+{ .mfi
+ nop.m 999
+(p7) fcmp.eq.unc p7,p0 = ROUNDL_REMAINDER, ROUNDL_HALF
+ nop.i 999 ;;
+}
+
+// If x<0 and remainder=0.5, result=truncated-1.0
+// If x>0 and remainder=0.5, result=truncated+1.0
+// Exit
+.pred.rel "mutex",p6,p7
+{ .mfi
+ nop.m 999
+(p6) fsub.s0 f8 = ROUNDL_FLOAT_TRUNC_f8,f1
+ nop.i 999
+}
+
+{ .mfb
+ nop.m 999
+(p7) fadd.s0 f8 = ROUNDL_FLOAT_TRUNC_f8,f1
+ br.ret.sptk b0 ;;
+}
+
+.endp roundl
+ASM_SIZE_DIRECTIVE(roundl)
diff --git a/sysdeps/ia64/fpu/s_scalbn.S b/sysdeps/ia64/fpu/s_scalbn.S
new file mode 100644
index 0000000..caedffd
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_scalbn.S
@@ -0,0 +1,366 @@
+.file "scalbn.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00 Initial version
+// 1/26/01 Scalbn completely reworked and now standalone version
+//
+// API
+//==============================================================
+// double = scalbn (double x, int n)
+// input floating point f8 and int n (r33)
+// output floating point f8
+//
+// Returns x* 2**n using an fma and detects overflow
+// and underflow.
+//
+//
+
+#include "libm_support.h"
+
+FR_Big = f6
+FR_NBig = f7
+FR_Floating_X = f8
+FR_Result = f8
+FR_Result2 = f9
+FR_Result3 = f11
+FR_Norm_X = f12
+FR_Two_N = f14
+FR_Two_to_Big = f15
+
+GR_N_Biased = r15
+GR_Big = r16
+GR_NBig = r17
+GR_Scratch = r18
+GR_Scratch1 = r19
+GR_Bias = r20
+GR_N_as_int = r21
+
+GR_SAVE_B0 = r32
+GR_SAVE_GP = r33
+GR_SAVE_PFS = r34
+GR_Parameter_X = r35
+GR_Parameter_Y = r36
+GR_Parameter_RESULT = r37
+GR_Tag = r38
+
+.align 32
+.global scalbn
+
+.section .text
+.proc scalbn
+.align 32
+
+scalbn:
+
+//
+// Is x NAN, INF, ZERO, +-?
+// Build the exponent Bias
+//
+{ .mfi
+ alloc r32=ar.pfs,1,2,4,0
+ fclass.m.unc p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero
+ addl GR_Bias = 0x0FFFF,r0
+}
+
+//
+// Sign extend input
+// Is N zero?
+// Normalize x
+//
+{ .mfi
+ cmp.eq.unc p6,p0 = r33,r0
+ fnorm.s1 FR_Norm_X = FR_Floating_X
+ sxt4 GR_N_as_int = r33
+}
+;;
+
+//
+// Normalize x
+// Branch and return special values.
+// Create -35000
+// Create 35000
+//
+{ .mfi
+ addl GR_Big = 35000,r0
+ nop.f 0
+ add GR_N_Biased = GR_Bias,GR_N_as_int
+}
+{ .mfb
+ addl GR_NBig = -35000,r0
+(p7) fma.d.s0 FR_Result = FR_Floating_X,f1, f0
+(p7) br.ret.spnt b0
+};;
+
+//
+// Build the exponent Bias
+// Return x when N = 0
+//
+{ .mfi
+ setf.exp FR_Two_N = GR_N_Biased
+ nop.f 0
+ addl GR_Scratch1 = 0x063BF,r0
+}
+{ .mfb
+ addl GR_Scratch = 0x019C3F,r0
+(p6) fma.d.s0 FR_Result = FR_Floating_X,f1, f0
+(p6) br.ret.spnt b0
+};;
+
+//
+// Create 2*big
+// Create 2**-big
+// Is N > 35000
+// Is N < -35000
+// Raise Denormal operand flag with compare
+// Main path, create 2**N
+//
+{ .mfi
+ setf.exp FR_NBig = GR_Scratch1
+ nop.f 0
+ cmp.ge.unc p6, p0 = GR_N_as_int, GR_Big
+}
+{ .mfi
+ setf.exp FR_Big = GR_Scratch
+ fcmp.ge.s0 p0,p11 = FR_Floating_X,f0
+ cmp.le.unc p8, p0 = GR_N_as_int, GR_NBig
+};;
+
+//
+// Adjust 2**N if N was very small or very large
+//
+{ .mfi
+ nop.m 0
+(p6) fma.s1 FR_Two_N = FR_Big,f1,f0
+ nop.i 0
+}
+{ .mlx
+ nop.m 999
+(p0) movl GR_Scratch = 0x00000000000303FF
+};;
+
+
+{ .mfi
+ nop.m 0
+(p8) fma.s1 FR_Two_N = FR_NBig,f1,f0
+ nop.i 0
+}
+{ .mlx
+ nop.m 999
+(p0) movl GR_Scratch1= 0x00000000000103FF
+};;
+
+// Set up necessary status fields
+//
+// S0 user supplied status
+// S2 user supplied status + WRE + TD (Overflows)
+// S3 user supplied status + FZ + TD (Underflows)
+//
+{ .mfi
+ nop.m 999
+(p0) fsetc.s3 0x7F,0x41
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fsetc.s2 0x7F,0x42
+ nop.i 999
+};;
+
+//
+// Do final operation
+//
+{ .mfi
+ setf.exp FR_NBig = GR_Scratch
+ fma.d.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.d.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
+ nop.i 999
+};;
+{ .mfi
+ setf.exp FR_Big = GR_Scratch1
+ fma.d.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
+ nop.i 999
+};;
+
+// Check for overflow or underflow.
+// Restore s3
+// Restore s2
+//
+{ .mfi
+ nop.m 0
+ fsetc.s3 0x7F,0x40
+ nop.i 999
+}
+{ .mfi
+ nop.m 0
+ fsetc.s2 0x7F,0x40
+ nop.i 999
+};;
+
+//
+// Is the result zero?
+//
+{ .mfi
+ nop.m 999
+ fclass.m.unc p6, p0 = FR_Result3, 0x007
+ nop.i 999
+}
+{ .mfi
+ addl GR_Tag = 176, r0
+ fcmp.ge.unc.s1 p7, p8 = FR_Result2 , FR_Big
+ nop.i 0
+};;
+
+//
+// Detect masked underflow - Tiny + Inexact Only
+//
+{ .mfi
+ nop.m 999
+(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
+ nop.i 999
+};;
+
+//
+// Is result bigger the allowed range?
+// Branch out for underflow
+//
+{ .mfb
+(p6) addl GR_Tag = 177, r0
+(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
+(p6) br.cond.spnt L(SCALBN_UNDERFLOW)
+};;
+
+//
+// Branch out for overflow
+//
+{ .mbb
+ nop.m 0
+(p7) br.cond.spnt L(SCALBN_OVERFLOW)
+(p9) br.cond.spnt L(SCALBN_OVERFLOW)
+};;
+
+//
+// Return from main path.
+//
+{ .mfb
+ nop.m 999
+ nop.f 0
+ br.ret.sptk b0;;
+}
+
+.endp scalbn
+ASM_SIZE_DIRECTIVE(scalbn)
+.proc __libm_error_region
+__libm_error_region:
+
+L(SCALBN_OVERFLOW):
+L(SCALBN_UNDERFLOW):
+
+//
+// Get stack address of N
+//
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs
+}
+//
+// Adjust sp
+//
+{ .mfi
+.fframe 64
+ add sp=-64,sp
+ nop.f 0
+ mov GR_SAVE_GP=gp
+};;
+
+//
+// Store N on stack in correct position
+// Locate the address of x on stack
+//
+{ .mmi
+ st8 [GR_Parameter_Y] = GR_N_as_int,16
+ add GR_Parameter_X = 16,sp
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0
+};;
+
+//
+// Store x on the stack.
+// Get address for result on stack.
+//
+.body
+{ .mib
+ stfd [GR_Parameter_X] = FR_Norm_X
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0
+}
+{ .mib
+ stfd [GR_Parameter_Y] = FR_Result
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support#
+};;
+
+//
+// Get location of result on stack
+//
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+
+//
+// Get the new result
+//
+{ .mmi
+ ldfd FR_Result = [GR_Parameter_RESULT]
+.restore sp
+ add sp = 64,sp
+ mov b0 = GR_SAVE_B0
+};;
+
+//
+// Restore gp, ar.pfs and return
+//
+{ .mib
+ mov gp = GR_SAVE_GP
+ mov ar.pfs = GR_SAVE_PFS
+ br.ret.sptk b0
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(scalbn)
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/s_scalbnf.S b/sysdeps/ia64/fpu/s_scalbnf.S
new file mode 100644
index 0000000..a68e82d
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_scalbnf.S
@@ -0,0 +1,366 @@
+//.file "scalbnf.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00 Initial version
+// 1/26/01 scalbnf completely reworked and now standalone version
+//
+// API
+//==============================================================
+// float = scalbnf (float x, int n)
+// input floating point f8 and int n (r33)
+// output floating point f8
+//
+// Returns x* 2**n using an fma and detects overflow
+// and underflow.
+//
+//
+
+#include "libm_support.h"
+
+FR_Big = f6
+FR_NBig = f7
+FR_Floating_X = f8
+FR_Result = f8
+FR_Result2 = f9
+FR_Result3 = f11
+FR_Norm_X = f12
+FR_Two_N = f14
+FR_Two_to_Big = f15
+
+GR_N_Biased = r15
+GR_Big = r16
+GR_NBig = r17
+GR_Scratch = r18
+GR_Scratch1 = r19
+GR_Bias = r20
+GR_N_as_int = r21
+
+GR_SAVE_B0 = r32
+GR_SAVE_GP = r33
+GR_SAVE_PFS = r34
+GR_Parameter_X = r35
+GR_Parameter_Y = r36
+GR_Parameter_RESULT = r37
+GR_Tag = r38
+
+.align 32
+.global scalbnf
+
+.section .text
+.proc scalbnf
+.align 32
+
+scalbnf:
+
+//
+// Is x NAN, INF, ZERO, +-?
+// Build the exponent Bias
+//
+{ .mfi
+ alloc r32=ar.pfs,1,2,4,0
+ fclass.m.unc p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero
+ addl GR_Bias = 0x0FFFF,r0
+}
+
+//
+// Sign extend input
+// Is N zero?
+// Normalize x
+//
+{ .mfi
+ cmp.eq.unc p6,p0 = r33,r0
+ fnorm.s1 FR_Norm_X = FR_Floating_X
+ sxt4 GR_N_as_int = r33
+}
+;;
+
+//
+// Normalize x
+// Branch and return special values.
+// Create -35000
+// Create 35000
+//
+{ .mfi
+ addl GR_Big = 35000,r0
+ nop.f 0
+ add GR_N_Biased = GR_Bias,GR_N_as_int
+}
+{ .mfb
+ addl GR_NBig = -35000,r0
+(p7) fma.s.s0 FR_Result = FR_Floating_X,f1, f0
+(p7) br.ret.spnt b0
+};;
+
+//
+// Build the exponent Bias
+// Return x when N = 0
+//
+{ .mfi
+ setf.exp FR_Two_N = GR_N_Biased
+ nop.f 0
+ addl GR_Scratch1 = 0x063BF,r0
+}
+{ .mfb
+ addl GR_Scratch = 0x019C3F,r0
+(p6) fma.s.s0 FR_Result = FR_Floating_X,f1, f0
+(p6) br.ret.spnt b0
+};;
+
+//
+// Create 2*big
+// Create 2**-big
+// Is N > 35000
+// Is N < -35000
+// Raise Denormal operand flag with compare
+// Main path, create 2**N
+//
+{ .mfi
+ setf.exp FR_NBig = GR_Scratch1
+ nop.f 0
+ cmp.ge.unc p6, p0 = GR_N_as_int, GR_Big
+}
+{ .mfi
+ setf.exp FR_Big = GR_Scratch
+ fcmp.ge.s0 p0,p11 = FR_Floating_X,f0
+ cmp.le.unc p8, p0 = GR_N_as_int, GR_NBig
+};;
+
+//
+// Adjust 2**N if N was very small or very large
+//
+{ .mfi
+ nop.m 0
+(p6) fma.s1 FR_Two_N = FR_Big,f1,f0
+ nop.i 0
+}
+{ .mlx
+ nop.m 999
+(p0) movl GR_Scratch = 0x000000000003007F
+};;
+
+
+{ .mfi
+ nop.m 0
+(p8) fma.s1 FR_Two_N = FR_NBig,f1,f0
+ nop.i 0
+}
+{ .mlx
+ nop.m 999
+(p0) movl GR_Scratch1= 0x000000000001007F
+};;
+
+// Set up necessary status fields
+//
+// S0 user supplied status
+// S2 user supplied status + WRE + TD (Overflows)
+// S3 user supplied status + FZ + TD (Underflows)
+//
+{ .mfi
+ nop.m 999
+(p0) fsetc.s3 0x7F,0x41
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fsetc.s2 0x7F,0x42
+ nop.i 999
+};;
+
+//
+// Do final operation
+//
+{ .mfi
+ setf.exp FR_NBig = GR_Scratch
+ fma.s.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
+ nop.i 999
+};;
+{ .mfi
+ setf.exp FR_Big = GR_Scratch1
+ fma.s.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
+ nop.i 999
+};;
+
+// Check for overflow or underflow.
+// Restore s3
+// Restore s2
+//
+{ .mfi
+ nop.m 0
+ fsetc.s3 0x7F,0x40
+ nop.i 999
+}
+{ .mfi
+ nop.m 0
+ fsetc.s2 0x7F,0x40
+ nop.i 999
+};;
+
+//
+// Is the result zero?
+//
+{ .mfi
+ nop.m 999
+ fclass.m.unc p6, p0 = FR_Result3, 0x007
+ nop.i 999
+}
+{ .mfi
+ addl GR_Tag = 178, r0
+ fcmp.ge.unc.s1 p7, p8 = FR_Result2 , FR_Big
+ nop.i 0
+};;
+
+//
+// Detect masked underflow - Tiny + Inexact Only
+//
+{ .mfi
+ nop.m 999
+(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
+ nop.i 999
+};;
+
+//
+// Is result bigger the allowed range?
+// Branch out for underflow
+//
+{ .mfb
+(p6) addl GR_Tag = 179, r0
+(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
+(p6) br.cond.spnt L(scalbnf_UNDERFLOW)
+};;
+
+//
+// Branch out for overflow
+//
+{ .mbb
+ nop.m 0
+(p7) br.cond.spnt L(scalbnf_OVERFLOW)
+(p9) br.cond.spnt L(scalbnf_OVERFLOW)
+};;
+
+//
+// Return from main path.
+//
+{ .mfb
+ nop.m 999
+ nop.f 0
+ br.ret.sptk b0;;
+}
+
+.endp scalbnf
+ASM_SIZE_DIRECTIVE(scalbnf)
+.proc __libm_error_region
+__libm_error_region:
+
+L(scalbnf_OVERFLOW):
+L(scalbnf_UNDERFLOW):
+
+//
+// Get stack address of N
+//
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs
+}
+//
+// Adjust sp
+//
+{ .mfi
+.fframe 64
+ add sp=-64,sp
+ nop.f 0
+ mov GR_SAVE_GP=gp
+};;
+
+//
+// Store N on stack in correct position
+// Locate the address of x on stack
+//
+{ .mmi
+ st8 [GR_Parameter_Y] = GR_N_as_int,16
+ add GR_Parameter_X = 16,sp
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0
+};;
+
+//
+// Store x on the stack.
+// Get address for result on stack.
+//
+.body
+{ .mib
+ stfs [GR_Parameter_X] = FR_Norm_X
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0
+}
+{ .mib
+ stfs [GR_Parameter_Y] = FR_Result
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support#
+};;
+
+//
+// Get location of result on stack
+//
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+
+//
+// Get the new result
+//
+{ .mmi
+ ldfs FR_Result = [GR_Parameter_RESULT]
+.restore sp
+ add sp = 64,sp
+ mov b0 = GR_SAVE_B0
+};;
+
+//
+// Restore gp, ar.pfs and return
+//
+{ .mib
+ mov gp = GR_SAVE_GP
+ mov ar.pfs = GR_SAVE_PFS
+ br.ret.sptk b0
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/s_scalbnl.S b/sysdeps/ia64/fpu/s_scalbnl.S
new file mode 100644
index 0000000..5f51c02
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_scalbnl.S
@@ -0,0 +1,366 @@
+//.file "scalbnl.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00 Initial version
+// 1/26/01 scalbnl completely reworked and now standalone version
+//
+// API
+//==============================================================
+// double-extended = scalbnl (double-extended x, int n)
+// input floating point f8 and int n (r34)
+// output floating point f8
+//
+// Returns x* 2**n using an fma and detects overflow
+// and underflow.
+//
+//
+
+#include "libm_support.h"
+
+FR_Big = f6
+FR_NBig = f7
+FR_Floating_X = f8
+FR_Result = f8
+FR_Result2 = f9
+FR_Result3 = f11
+FR_Norm_X = f12
+FR_Two_N = f14
+FR_Two_to_Big = f15
+
+GR_N_Biased = r15
+GR_Big = r16
+GR_NBig = r17
+GR_Scratch = r18
+GR_Scratch1 = r19
+GR_Bias = r20
+GR_N_as_int = r21
+
+GR_SAVE_B0 = r32
+GR_SAVE_GP = r33
+GR_SAVE_PFS = r34
+GR_Parameter_X = r35
+GR_Parameter_Y = r36
+GR_Parameter_RESULT = r37
+GR_Tag = r38
+
+.align 32
+.global scalbnl
+
+.section .text
+.proc scalbnl
+.align 32
+
+scalbnl:
+
+//
+// Is x NAN, INF, ZERO, +-?
+// Build the exponent Bias
+//
+{ .mfi
+ alloc r32=ar.pfs,2,1,4,0
+ fclass.m.unc p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero
+ addl GR_Bias = 0x0FFFF,r0
+}
+
+//
+// Sign extend input
+// Is N zero?
+// Normalize x
+//
+{ .mfi
+ cmp.eq.unc p6,p0 = r34,r0
+ fnorm.s1 FR_Norm_X = FR_Floating_X
+ sxt4 GR_N_as_int = r34
+}
+;;
+
+//
+// Normalize x
+// Branch and return special values.
+// Create -35000
+// Create 35000
+//
+{ .mfi
+ addl GR_Big = 35000,r0
+ nop.f 0
+ add GR_N_Biased = GR_Bias,GR_N_as_int
+}
+{ .mfb
+ addl GR_NBig = -35000,r0
+(p7) fma.s0 FR_Result = FR_Floating_X,f1, f0
+(p7) br.ret.spnt b0
+};;
+
+//
+// Build the exponent Bias
+// Return x when N = 0
+//
+{ .mfi
+ setf.exp FR_Two_N = GR_N_Biased
+ nop.f 0
+ addl GR_Scratch1 = 0x063BF,r0
+}
+{ .mfb
+ addl GR_Scratch = 0x019C3F,r0
+(p6) fma.s0 FR_Result = FR_Floating_X,f1, f0
+(p6) br.ret.spnt b0
+};;
+
+//
+// Create 2*big
+// Create 2**-big
+// Is N > 35000
+// Is N < -35000
+// Raise Denormal operand flag with compare
+// Main path, create 2**N
+//
+{ .mfi
+ setf.exp FR_NBig = GR_Scratch1
+ nop.f 0
+ cmp.ge.unc p6, p0 = GR_N_as_int, GR_Big
+}
+{ .mfi
+ setf.exp FR_Big = GR_Scratch
+ fcmp.ge.s0 p0,p11 = FR_Floating_X,f0
+ cmp.le.unc p8, p0 = GR_N_as_int, GR_NBig
+};;
+
+//
+// Adjust 2**N if N was very small or very large
+//
+{ .mfi
+ nop.m 0
+(p6) fma.s1 FR_Two_N = FR_Big,f1,f0
+ nop.i 0
+}
+{ .mlx
+ nop.m 999
+(p0) movl GR_Scratch = 0x0000000000033FFF
+};;
+
+
+{ .mfi
+ nop.m 0
+(p8) fma.s1 FR_Two_N = FR_NBig,f1,f0
+ nop.i 0
+}
+{ .mlx
+ nop.m 999
+(p0) movl GR_Scratch1= 0x0000000000013FFF
+};;
+
+// Set up necessary status fields
+//
+// S0 user supplied status
+// S2 user supplied status + WRE + TD (Overflows)
+// S3 user supplied status + FZ + TD (Underflows)
+//
+{ .mfi
+ nop.m 999
+(p0) fsetc.s3 0x7F,0x41
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fsetc.s2 0x7F,0x42
+ nop.i 999
+};;
+
+//
+// Do final operation
+//
+{ .mfi
+ setf.exp FR_NBig = GR_Scratch
+ fma.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
+ nop.i 999
+};;
+{ .mfi
+ setf.exp FR_Big = GR_Scratch1
+ fma.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
+ nop.i 999
+};;
+
+// Check for overflow or underflow.
+// Restore s3
+// Restore s2
+//
+{ .mfi
+ nop.m 0
+ fsetc.s3 0x7F,0x40
+ nop.i 999
+}
+{ .mfi
+ nop.m 0
+ fsetc.s2 0x7F,0x40
+ nop.i 999
+};;
+
+//
+// Is the result zero?
+//
+{ .mfi
+ nop.m 999
+ fclass.m.unc p6, p0 = FR_Result3, 0x007
+ nop.i 999
+}
+{ .mfi
+ addl GR_Tag = 174, r0
+ fcmp.ge.unc.s1 p7, p8 = FR_Result2 , FR_Big
+ nop.i 0
+};;
+
+//
+// Detect masked underflow - Tiny + Inexact Only
+//
+{ .mfi
+ nop.m 999
+(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
+ nop.i 999
+};;
+
+//
+// Is result bigger the allowed range?
+// Branch out for underflow
+//
+{ .mfb
+(p6) addl GR_Tag = 175, r0
+(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
+(p6) br.cond.spnt L(scalbnl_UNDERFLOW)
+};;
+
+//
+// Branch out for overflow
+//
+{ .mbb
+ nop.m 0
+(p7) br.cond.spnt L(scalbnl_OVERFLOW)
+(p9) br.cond.spnt L(scalbnl_OVERFLOW)
+};;
+
+//
+// Return from main path.
+//
+{ .mfb
+ nop.m 999
+ nop.f 0
+ br.ret.sptk b0;;
+}
+
+.endp scalbnl
+ASM_SIZE_DIRECTIVE(scalbnl)
+.proc __libm_error_region
+__libm_error_region:
+
+L(scalbnl_OVERFLOW):
+L(scalbnl_UNDERFLOW):
+
+//
+// Get stack address of N
+//
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs
+}
+//
+// Adjust sp
+//
+{ .mfi
+.fframe 64
+ add sp=-64,sp
+ nop.f 0
+ mov GR_SAVE_GP=gp
+};;
+
+//
+// Store N on stack in correct position
+// Locate the address of x on stack
+//
+{ .mmi
+ st8 [GR_Parameter_Y] = GR_N_as_int,16
+ add GR_Parameter_X = 16,sp
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0
+};;
+
+//
+// Store x on the stack.
+// Get address for result on stack.
+//
+.body
+{ .mib
+ stfe [GR_Parameter_X] = FR_Norm_X
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0
+}
+{ .mib
+ stfe [GR_Parameter_Y] = FR_Result
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support#
+};;
+
+//
+// Get location of result on stack
+//
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+
+//
+// Get the new result
+//
+{ .mmi
+ ldfe FR_Result = [GR_Parameter_RESULT]
+.restore sp
+ add sp = 64,sp
+ mov b0 = GR_SAVE_B0
+};;
+
+//
+// Restore gp, ar.pfs and return
+//
+{ .mib
+ mov gp = GR_SAVE_GP
+ mov ar.pfs = GR_SAVE_PFS
+ br.ret.sptk b0
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/s_significand.S b/sysdeps/ia64/fpu/s_significand.S
new file mode 100644
index 0000000..0cbfd42
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_significand.S
@@ -0,0 +1,147 @@
+.file "significand.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00: Initial version
+// 4/04/00 Unwind support added
+// 5/31/00: Fixed bug when x a double-extended denormal
+//
+// API
+//==============================================================
+// double significand(double x)
+//
+// Overview of operation
+//==============================================================
+// If x = sig * 2**n with 1 <= sig < 2
+// significand returns sig
+//
+// predicate registers used:
+// p6, p7
+//
+// floating-point registers used:
+// f8, f9, f10
+
+#include "libm_support.h"
+
+.align 32
+.global significand#
+
+.section .text
+.proc significand#
+.align 32
+
+significand:
+
+// qnan snan inf norm unorm 0 -+
+// 1 1 1 0 0 1 11
+
+// f10 gets f8(sign) with f1(exp,significand)
+{ .mfi
+ nop.m 999
+(p0) fmerge.s f10 = f8,f1
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fnorm f9 = f8
+ nop.i 999 ;;
+}
+
+// Test for denormal input
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p7,p0 = f8, 0x0b
+ nop.i 999 ;;
+}
+
+// p6 = TRUE ==> x is not (nan,inf,0)
+// return sign(f8) exp(f1) significand(f8)
+// else x is (nan,inf,0)
+// return sign(f8) exp(f8) significand(f8), normalized.
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p0,p6 = f8, 0xe7
+ nop.i 999 ;;
+}
+
+{ .mmb
+ nop.m 999
+ nop.m 999
+(p7) br.cond.spnt L(SIGNIFICAND_DENORM) ;; // Branch if x denormal
+}
+
+{ .mfi
+ nop.m 999
+(p6) fmerge.se f8 = f10,f8
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p0) fnorm.d f8 = f8
+(p0) br.ret.sptk b0 ;;
+}
+
+L(SIGNIFICAND_DENORM):
+// Here if x denorm
+{ .mfi
+ nop.m 999
+(p0) fmerge.se f8 = f10,f9
+ nop.i 999 ;;
+}
+
+// Check if fnorm(x) still denormal, means x double-extended denormal
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p7,p0 = f9, 0x0b
+ nop.i 999 ;;
+}
+
+// This will be the final result unless x double-extended denormal
+{ .mfi
+ nop.m 999
+(p0) fnorm.d f8 = f8
+ nop.i 999 ;;
+}
+
+// If x double-extended denorm, then significand ok, but must merge in
+// correct signexp
+{ .mfi
+ nop.m 999
+(p7) fmerge.se f8 = f10,f8
+ nop.i 999 ;;
+}
+
+// Final normalization if x double-extended denorm
+{ .mfb
+ nop.m 999
+(p7) fnorm.d f8 = f8
+(p0) br.ret.sptk b0 ;;
+}
+
+.endp significand
+ASM_SIZE_DIRECTIVE(significand)
diff --git a/sysdeps/ia64/fpu/s_significandf.S b/sysdeps/ia64/fpu/s_significandf.S
new file mode 100644
index 0000000..bdabe34
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_significandf.S
@@ -0,0 +1,146 @@
+.file "significandf.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00: Initial version
+// 2/03/00: Modified to improve speed
+// 5/31/00: Fixed bug when x a double-extended denormal
+//
+// API
+//==============================================================
+// float significandf(float x)
+// Overview of operation
+//==============================================================
+// If x = sig * 2**n with 1 <= sig < 2
+// significandf returns sig
+//
+// predicate registers used:
+// p6, p7
+//
+// floating-point registers used:
+// f8, f9, f10
+
+#include "libm_support.h"
+
+.align 32
+.global significandf#
+
+.section .text
+.proc significandf#
+.align 32
+
+significandf:
+
+// qnan snan inf norm unorm 0 -+
+// 1 1 1 0 0 1 11
+
+// f10 gets f8(sign) with f1(exp,significand)
+{ .mfi
+ nop.m 999
+(p0) fmerge.s f10 = f8,f1
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fnorm f9 = f8
+ nop.i 999 ;;
+}
+
+// Test for denormal input
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p7,p0 = f8, 0x0b
+ nop.i 999 ;;
+}
+
+// p6 = TRUE ==> x is not (nan,inf,0)
+// return sign(f8) exp(f1) significand(f8)
+// else x is (nan,inf,0)
+// return sign(f8) exp(f8) significand(f8), normalized.
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p0,p6 = f8, 0xe7
+ nop.i 999 ;;
+}
+
+{ .mmb
+ nop.m 999
+ nop.m 999
+(p7) br.cond.spnt L(SIGNIFICAND_DENORM) ;; // Branch if x denormal
+}
+
+{ .mfi
+ nop.m 999
+(p6) fmerge.se f8 = f10,f8
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p0) fnorm.s f8 = f8
+(p0) br.ret.sptk b0 ;;
+}
+
+L(SIGNIFICAND_DENORM):
+// Here if x denorm
+{ .mfi
+ nop.m 999
+(p0) fmerge.se f8 = f10,f9
+ nop.i 999 ;;
+}
+
+// Check if fnorm(x) still denormal, means x double-extended denormal
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p7,p0 = f9, 0x0b
+ nop.i 999 ;;
+}
+
+// This will be the final result unless x double-extended denormal
+{ .mfi
+ nop.m 999
+(p0) fnorm.s f8 = f8
+ nop.i 999 ;;
+}
+
+// If x double-extended denorm, then significand ok, but must merge in
+// correct signexp
+{ .mfi
+ nop.m 999
+(p7) fmerge.se f8 = f10,f8
+ nop.i 999 ;;
+}
+
+// Final normalization if x double-extended denorm
+{ .mfb
+ nop.m 999
+(p7) fnorm.s f8 = f8
+(p0) br.ret.sptk b0 ;;
+}
+
+.endp significandf
+ASM_SIZE_DIRECTIVE(significandf)
diff --git a/sysdeps/ia64/fpu/s_significandl.S b/sysdeps/ia64/fpu/s_significandl.S
new file mode 100644
index 0000000..5dcda0e
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_significandl.S
@@ -0,0 +1,147 @@
+.file "significandl.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00: Initial version
+// 2/03/00: Modified to improve speed
+// 5/31/00: Fixed bug when x a double-extended denormal
+//
+// API
+//==============================================================
+// long double significandl(long double x)
+//
+// Overview of operation
+//==============================================================
+// If x = sig * 2**n with 1 <= sig < 2
+// significandl returns sig
+//
+// predicate registers used:
+// p6, p7
+//
+// floating-point registers used:
+// f8, f9, f10
+
+#include "libm_support.h"
+
+.align 32
+.global significandl#
+
+.section .text
+.proc significandl#
+.align 32
+
+significandl:
+
+// qnan snan inf norm unorm 0 -+
+// 1 1 1 0 0 1 11
+
+// f10 gets f8(sign) with f1(exp,significand)
+{ .mfi
+ nop.m 999
+(p0) fmerge.s f10 = f8,f1
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fnorm f9 = f8
+ nop.i 999 ;;
+}
+
+// Test for denormal input
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p7,p0 = f8, 0x0b
+ nop.i 999 ;;
+}
+
+// p6 = TRUE ==> x is not (nan,inf,0)
+// return sign(f8) exp(f1) significand(f8)
+// else x is (nan,inf,0)
+// return sign(f8) exp(f8) significand(f8), normalized.
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p0,p6 = f8, 0xe7
+ nop.i 999 ;;
+}
+
+{ .mmb
+ nop.m 999
+ nop.m 999
+(p7) br.cond.spnt L(SIGNIFICAND_DENORM) ;; // Branch if x denormal
+}
+
+{ .mfi
+ nop.m 999
+(p6) fmerge.se f8 = f10,f8
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p0) fnorm f8 = f8
+(p0) br.ret.sptk b0 ;;
+}
+
+L(SIGNIFICAND_DENORM):
+// Here if x denorm
+{ .mfi
+ nop.m 999
+(p0) fmerge.se f8 = f10,f9
+ nop.i 999 ;;
+}
+
+// Check if fnorm(x) still denormal, means x double-extended denormal
+{ .mfi
+ nop.m 999
+(p0) fclass.m.unc p7,p0 = f9, 0x0b
+ nop.i 999 ;;
+}
+
+// This will be the final result unless x double-extended denormal
+{ .mfi
+ nop.m 999
+(p0) fnorm f8 = f8
+ nop.i 999 ;;
+}
+
+// If x double-extended denorm, then significand ok, but must merge in
+// correct signexp
+{ .mfi
+ nop.m 999
+(p7) fmerge.se f8 = f10,f8
+ nop.i 999 ;;
+}
+
+// Final normalization if x double-extended denorm
+{ .mfb
+ nop.m 999
+(p7) fnorm f8 = f8
+(p0) br.ret.sptk b0 ;;
+}
+
+.endp significandl
+ASM_SIZE_DIRECTIVE(significandl)
diff --git a/sysdeps/ia64/fpu/s_sin.c b/sysdeps/ia64/fpu/s_sin.c
new file mode 100644
index 0000000..41254ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_sin.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/s_sincos.c b/sysdeps/ia64/fpu/s_sincos.c
new file mode 100644
index 0000000..1ddbc21
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_sincos.c
@@ -0,0 +1,9 @@
+#include <math.h>
+
+void
+__sincos (double x, double *s, double *c)
+{
+ *s = sin (x);
+ *c = cos (x);
+}
+weak_alias (__sincos, sincos)
diff --git a/sysdeps/ia64/fpu/s_sincosf.c b/sysdeps/ia64/fpu/s_sincosf.c
new file mode 100644
index 0000000..efd0fe3
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_sincosf.c
@@ -0,0 +1,9 @@
+#include <math.h>
+
+void
+__sincosf (float x, float *s, float *c)
+{
+ *s = sinf (x);
+ *c = cosf (x);
+}
+weak_alias (__sincosf, sincosf)
diff --git a/sysdeps/ia64/fpu/s_sincosl.c b/sysdeps/ia64/fpu/s_sincosl.c
new file mode 100644
index 0000000..a835b77
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_sincosl.c
@@ -0,0 +1,9 @@
+#include <math.h>
+
+void
+__sincosl (long double x, long double *s, long double *c)
+{
+ *s = sinl (x);
+ *c = cosl (x);
+}
+weak_alias (__sincosl, sincosl)
diff --git a/sysdeps/ia64/fpu/s_sinf.c b/sysdeps/ia64/fpu/s_sinf.c
new file mode 100644
index 0000000..41254ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_sinf.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/s_sinl.c b/sysdeps/ia64/fpu/s_sinl.c
new file mode 100644
index 0000000..41254ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_sinl.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/s_tan.S b/sysdeps/ia64/fpu/s_tan.S
new file mode 100644
index 0000000..3678a42
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_tan.S
@@ -0,0 +1,757 @@
+.file "tan.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00: Initial version
+// 4/04/00 Unwind support added
+// 12/27/00 Improved speed
+//
+// API
+//==============================================================
+// double tan( double x);
+//
+// Overview of operation
+//==============================================================
+// If the input value in radians is |x| >= 1.xxxxx 2^10 call the
+// older slower version.
+//
+// The new algorithm is used when |x| <= 1.xxxxx 2^9.
+//
+// Represent the input X as Nfloat * pi/2 + r
+// where r can be negative and |r| <= pi/4
+//
+// tan_W = x * 2/pi
+// Nfloat = round_int(tan_W)
+//
+// tan_r = x - Nfloat * (pi/2)_hi
+// tan_r = tan_r - Nfloat * (pi/2)_lo
+//
+// We have two paths: p8, when Nfloat is even and p9. when Nfloat is odd.
+// p8: tan(X) = tan(r)
+// p9: tan(X) = -cot(r)
+//
+// Each is evaluated as a series. The p9 path requires 1/r.
+//
+// The coefficients used in the series are stored in a table as
+// are the pi constants.
+//
+// Registers used
+//==============================================================
+//
+// predicate registers used:
+// p6-10
+//
+// floating-point registers used:
+// f10-15, f32-105
+// f8, input
+//
+// general registers used
+// r14-18, r32-43
+//
+
+#include "libm_support.h"
+
+// Assembly macros
+//==============================================================
+TAN_INV_PI_BY_2_2TO64 = f10
+TAN_RSHF_2TO64 = f11
+TAN_2TOM64 = f12
+TAN_RSHF = f13
+TAN_W_2TO64_RSH = f14
+TAN_NFLOAT = f15
+
+tan_Inv_Pi_by_2 = f32
+tan_Pi_by_2_hi = f33
+tan_Pi_by_2_lo = f34
+
+
+tan_P0 = f35
+tan_P1 = f36
+tan_P2 = f37
+tan_P3 = f38
+tan_P4 = f39
+tan_P5 = f40
+tan_P6 = f41
+tan_P7 = f42
+tan_P8 = f43
+tan_P9 = f44
+tan_P10 = f45
+tan_P11 = f46
+tan_P12 = f47
+tan_P13 = f48
+tan_P14 = f49
+tan_P15 = f50
+
+tan_Q0 = f51
+tan_Q1 = f52
+tan_Q2 = f53
+tan_Q3 = f54
+tan_Q4 = f55
+tan_Q5 = f56
+tan_Q6 = f57
+tan_Q7 = f58
+tan_Q8 = f59
+tan_Q9 = f60
+tan_Q10 = f61
+
+tan_r = f62
+tan_rsq = f63
+tan_rcube = f64
+
+tan_v18 = f65
+tan_v16 = f66
+tan_v17 = f67
+tan_v12 = f68
+tan_v13 = f69
+tan_v7 = f70
+tan_v8 = f71
+tan_v4 = f72
+tan_v5 = f73
+tan_v15 = f74
+tan_v11 = f75
+tan_v14 = f76
+tan_v3 = f77
+tan_v6 = f78
+tan_v10 = f79
+tan_v2 = f80
+tan_v9 = f81
+tan_v1 = f82
+tan_int_Nfloat = f83
+tan_Nfloat = f84
+
+tan_NORM_f8 = f85
+tan_W = f86
+
+tan_y0 = f87
+tan_d = f88
+tan_y1 = f89
+tan_dsq = f90
+tan_y2 = f91
+tan_d4 = f92
+tan_inv_r = f93
+
+tan_z1 = f94
+tan_z2 = f95
+tan_z3 = f96
+tan_z4 = f97
+tan_z5 = f98
+tan_z6 = f99
+tan_z7 = f100
+tan_z8 = f101
+tan_z9 = f102
+tan_z10 = f103
+tan_z11 = f104
+tan_z12 = f105
+
+
+/////////////////////////////////////////////////////////////
+
+tan_GR_sig_inv_pi_by_2 = r14
+tan_GR_rshf_2to64 = r15
+tan_GR_exp_2tom64 = r16
+tan_GR_n = r17
+tan_GR_rshf = r18
+
+tan_AD = r33
+tan_GR_10009 = r34
+tan_GR_17_ones = r35
+tan_GR_N_odd_even = r36
+tan_GR_N = r37
+tan_signexp = r38
+tan_exp = r39
+tan_ADQ = r40
+
+GR_SAVE_PFS = r41
+GR_SAVE_B0 = r42
+GR_SAVE_GP = r43
+
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
+.align 16
+
+double_tan_constants:
+ASM_TYPE_DIRECTIVE(double_tan_constants,@object)
+// data8 0xA2F9836E4E44152A, 0x00003FFE // 2/pi
+ data8 0xC90FDAA22168C234, 0x00003FFF // pi/2 hi
+
+ data8 0xBEEA54580DDEA0E1 // P14
+ data8 0x3ED3021ACE749A59 // P15
+ data8 0xBEF312BD91DC8DA1 // P12
+ data8 0x3EFAE9AFC14C5119 // P13
+ data8 0x3F2F342BF411E769 // P8
+ data8 0x3F1A60FC9F3B0227 // P9
+ data8 0x3EFF246E78E5E45B // P10
+ data8 0x3F01D9D2E782875C // P11
+ data8 0x3F8226E34C4499B6 // P4
+ data8 0x3F6D6D3F12C236AC // P5
+ data8 0x3F57DA1146DCFD8B // P6
+ data8 0x3F43576410FE3D75 // P7
+ data8 0x3FD5555555555555 // P0
+ data8 0x3FC11111111111C2 // P1
+ data8 0x3FABA1BA1BA0E850 // P2
+ data8 0x3F9664F4886725A7 // P3
+ASM_SIZE_DIRECTIVE(double_tan_constants)
+
+double_Q_tan_constants:
+ASM_TYPE_DIRECTIVE(double_Q_tan_constants,@object)
+ data8 0xC4C6628B80DC1CD1, 0x00003FBF // pi/2 lo
+ data8 0x3E223A73BA576E48 // Q8
+ data8 0x3DF54AD8D1F2CA43 // Q9
+ data8 0x3EF66A8EE529A6AA // Q4
+ data8 0x3EC2281050410EE6 // Q5
+ data8 0x3E8D6BB992CC3CF5 // Q6
+ data8 0x3E57F88DE34832E4 // Q7
+ data8 0x3FD5555555555555 // Q0
+ data8 0x3F96C16C16C16DB8 // Q1
+ data8 0x3F61566ABBFFB489 // Q2
+ data8 0x3F2BBD77945C1733 // Q3
+ data8 0x3D927FB33E2B0E04 // Q10
+ASM_SIZE_DIRECTIVE(double_Q_tan_constants)
+
+
+
+.align 32
+.global tan#
+#ifdef _LIBC
+.global __tan#
+#endif
+
+////////////////////////////////////////////////////////
+
+
+
+.section .text
+.proc tan#
+#ifdef _LIBC
+.proc __tan#
+#endif
+.align 32
+tan:
+#ifdef _LIBC
+__tan:
+#endif
+// The initial fnorm will take any unmasked faults and
+// normalize any single/double unorms
+
+{ .mlx
+ alloc r32=ar.pfs,1,11,0,0
+ movl tan_GR_sig_inv_pi_by_2 = 0xA2F9836E4E44152A // significand of 2/pi
+}
+{ .mlx
+ addl tan_AD = @ltoff(double_tan_constants), gp
+ movl tan_GR_rshf_2to64 = 0x47e8000000000000 // 1.1000 2^(63+63+1)
+}
+;;
+
+{ .mfi
+ ld8 tan_AD = [tan_AD]
+ fnorm tan_NORM_f8 = f8
+ mov tan_GR_exp_2tom64 = 0xffff-64 // exponent of scaling factor 2^-64
+}
+{ .mlx
+ nop.m 999
+ movl tan_GR_rshf = 0x43e8000000000000 // 1.1000 2^63 for right shift
+}
+;;
+
+
+// Form two constants we need
+// 2/pi * 2^1 * 2^63, scaled by 2^64 since we just loaded the significand
+// 1.1000...000 * 2^(63+63+1) to right shift int(W) into the significand
+{ .mmi
+ setf.sig TAN_INV_PI_BY_2_2TO64 = tan_GR_sig_inv_pi_by_2
+ setf.d TAN_RSHF_2TO64 = tan_GR_rshf_2to64
+ mov tan_GR_17_ones = 0x1ffff ;;
+}
+
+
+// Form another constant
+// 2^-64 for scaling Nfloat
+// 1.1000...000 * 2^63, the right shift constant
+{ .mmf
+ setf.exp TAN_2TOM64 = tan_GR_exp_2tom64
+ adds tan_ADQ = double_Q_tan_constants - double_tan_constants, tan_AD
+ fclass.m.unc p6,p0 = f8, 0x07 // Test for x=0
+}
+;;
+
+
+// Form another constant
+// 2^-64 for scaling Nfloat
+// 1.1000...000 * 2^63, the right shift constant
+{ .mmf
+ setf.d TAN_RSHF = tan_GR_rshf
+ ldfe tan_Pi_by_2_hi = [tan_AD],16
+ fclass.m.unc p7,p0 = f8, 0x23 // Test for x=inf
+}
+;;
+
+{ .mfb
+ ldfe tan_Pi_by_2_lo = [tan_ADQ],16
+ fclass.m.unc p8,p0 = f8, 0xc3 // Test for x=nan
+(p6) br.ret.spnt b0 ;; // Exit for x=0
+}
+
+{ .mfi
+ ldfpd tan_P14,tan_P15 = [tan_AD],16
+(p7) frcpa.s0 f8,p9=f0,f0 // Set qnan indef if x=inf
+ mov tan_GR_10009 = 0x10009
+}
+{ .mib
+ ldfpd tan_Q8,tan_Q9 = [tan_ADQ],16
+ nop.i 999
+(p7) br.ret.spnt b0 ;; // Exit for x=inf
+}
+
+{ .mfi
+ ldfpd tan_P12,tan_P13 = [tan_AD],16
+(p8) fma.d f8=f8,f1,f8 // Set qnan if x=nan
+ nop.i 999
+}
+{ .mib
+ ldfpd tan_Q4,tan_Q5 = [tan_ADQ],16
+ nop.i 999
+(p8) br.ret.spnt b0 ;; // Exit for x=nan
+}
+
+{ .mmi
+ getf.exp tan_signexp = tan_NORM_f8
+ ldfpd tan_P8,tan_P9 = [tan_AD],16
+ nop.i 999 ;;
+}
+
+// Multiply x by scaled 2/pi and add large const to shift integer part of W to
+// rightmost bits of significand
+{ .mfi
+ ldfpd tan_Q6,tan_Q7 = [tan_ADQ],16
+ fma.s1 TAN_W_2TO64_RSH = tan_NORM_f8,TAN_INV_PI_BY_2_2TO64,TAN_RSHF_2TO64
+ nop.i 999 ;;
+}
+
+{ .mmi
+ ldfpd tan_P10,tan_P11 = [tan_AD],16
+ nop.m 999
+ and tan_exp = tan_GR_17_ones, tan_signexp ;;
+}
+
+
+// p7 is true if we must call DBX TAN
+// p7 is true if f8 exp is > 0x10009 (which includes all ones
+// NAN or inf)
+{ .mmi
+ ldfpd tan_Q0,tan_Q1 = [tan_ADQ],16
+ cmp.ge.unc p7,p0 = tan_exp,tan_GR_10009
+ nop.i 999 ;;
+}
+
+
+{ .mmb
+ ldfpd tan_P4,tan_P5 = [tan_AD],16
+ nop.m 999
+(p7) br.cond.spnt L(TAN_DBX) ;;
+}
+
+
+{ .mmi
+ ldfpd tan_Q2,tan_Q3 = [tan_ADQ],16
+ nop.m 999
+ nop.i 999 ;;
+}
+
+
+
+// TAN_NFLOAT = Round_Int_Nearest(tan_W)
+{ .mfi
+ ldfpd tan_P6,tan_P7 = [tan_AD],16
+ fms.s1 TAN_NFLOAT = TAN_W_2TO64_RSH,TAN_2TOM64,TAN_RSHF
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ ldfd tan_Q10 = [tan_ADQ]
+ nop.f 999
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ ldfpd tan_P0,tan_P1 = [tan_AD],16
+ nop.f 999
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ getf.sig tan_GR_n = TAN_W_2TO64_RSH
+ nop.f 999
+ nop.i 999 ;;
+}
+
+// tan_r = -tan_Nfloat * tan_Pi_by_2_hi + x
+{ .mfi
+ ldfpd tan_P2,tan_P3 = [tan_AD]
+ fnma.s1 tan_r = TAN_NFLOAT, tan_Pi_by_2_hi, tan_NORM_f8
+ nop.i 999 ;;
+}
+
+
+// p8 ==> even
+// p9 ==> odd
+{ .mmi
+ and tan_GR_N_odd_even = 0x1, tan_GR_n ;;
+ nop.m 999
+ cmp.eq.unc p8,p9 = tan_GR_N_odd_even, r0 ;;
+}
+
+
+// tan_r = tan_r -tan_Nfloat * tan_Pi_by_2_lo
+{ .mfi
+ nop.m 999
+ fnma.s1 tan_r = TAN_NFLOAT, tan_Pi_by_2_lo, tan_r
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 tan_rsq = tan_r, tan_r, f0
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p9) frcpa.s1 tan_y0, p10 = f1,tan_r
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 tan_v18 = tan_rsq, tan_P15, tan_P14
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p8) fma.s1 tan_v4 = tan_rsq, tan_P1, tan_P0
+ nop.i 999 ;;
+}
+
+
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 tan_v16 = tan_rsq, tan_P13, tan_P12
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p8) fma.s1 tan_v17 = tan_rsq, tan_rsq, f0
+ nop.i 999 ;;
+}
+
+
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 tan_v12 = tan_rsq, tan_P9, tan_P8
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p8) fma.s1 tan_v13 = tan_rsq, tan_P11, tan_P10
+ nop.i 999 ;;
+}
+
+
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 tan_v7 = tan_rsq, tan_P5, tan_P4
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p8) fma.s1 tan_v8 = tan_rsq, tan_P7, tan_P6
+ nop.i 999 ;;
+}
+
+
+
+{ .mfi
+ nop.m 999
+(p9) fnma.s1 tan_d = tan_r, tan_y0, f1
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p8) fma.s1 tan_v5 = tan_rsq, tan_P3, tan_P2
+ nop.i 999 ;;
+}
+
+
+
+{ .mfi
+ nop.m 999
+(p9) fma.s1 tan_z11 = tan_rsq, tan_Q9, tan_Q8
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p9) fma.s1 tan_z12 = tan_rsq, tan_rsq, f0
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 tan_v15 = tan_v17, tan_v18, tan_v16
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p9) fma.s1 tan_z7 = tan_rsq, tan_Q5, tan_Q4
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 tan_v11 = tan_v17, tan_v13, tan_v12
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p9) fma.s1 tan_z8 = tan_rsq, tan_Q7, tan_Q6
+ nop.i 999 ;;
+}
+
+
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 tan_v14 = tan_v17, tan_v17, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p9) fma.s1 tan_z3 = tan_rsq, tan_Q1, tan_Q0
+ nop.i 999 ;;
+}
+
+
+
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 tan_v3 = tan_v17, tan_v5, tan_v4
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p8) fma.s1 tan_v6 = tan_v17, tan_v8, tan_v7
+ nop.i 999 ;;
+}
+
+
+
+{ .mfi
+ nop.m 999
+(p9) fma.s1 tan_y1 = tan_y0, tan_d, tan_y0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p9) fma.s1 tan_dsq = tan_d, tan_d, f0
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p9) fma.s1 tan_z10 = tan_z12, tan_Q10, tan_z11
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p9) fma.s1 tan_z9 = tan_z12, tan_z12,f0
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p9) fma.s1 tan_z4 = tan_rsq, tan_Q3, tan_Q2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p9) fma.s1 tan_z6 = tan_z12, tan_z8, tan_z7
+ nop.i 999 ;;
+}
+
+
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 tan_v10 = tan_v14, tan_v15, tan_v11
+ nop.i 999 ;;
+}
+
+
+
+{ .mfi
+ nop.m 999
+(p9) fma.s1 tan_y2 = tan_y1, tan_d, tan_y0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p9) fma.s1 tan_d4 = tan_dsq, tan_dsq, tan_d
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 tan_v2 = tan_v14, tan_v6, tan_v3
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p8) fma.s1 tan_v9 = tan_v14, tan_v14, f0
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p9) fma.s1 tan_z2 = tan_z12, tan_z4, tan_z3
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p9) fma.s1 tan_z5 = tan_z9, tan_z10, tan_z6
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p9) fma.s1 tan_inv_r = tan_d4, tan_y2, tan_y0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p8) fma.s1 tan_rcube = tan_rsq, tan_r, f0
+ nop.i 999 ;;
+}
+
+
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 tan_v1 = tan_v9, tan_v10, tan_v2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p9) fma.s1 tan_z1 = tan_z9, tan_z5, tan_z2
+ nop.i 999 ;;
+}
+
+
+
+{ .mfi
+ nop.m 999
+(p8) fma.d.s0 f8 = tan_v1, tan_rcube, tan_r
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p9) fms.d.s0 f8 = tan_r, tan_z1, tan_inv_r
+ br.ret.sptk b0 ;;
+}
+.endp tan#
+ASM_SIZE_DIRECTIVE(tan)
+
+
+.proc __libm_callout
+__libm_callout:
+L(TAN_DBX):
+.prologue
+
+{ .mfi
+ nop.m 0
+ fmerge.s f9 = f0,f0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs
+}
+;;
+
+{ .mfi
+ mov GR_SAVE_GP=gp
+ nop.f 0
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0
+}
+
+.body
+{ .mfb
+ nop.m 999
+ nop.f 999
+ br.call.sptk.many b0=__libm_tan# ;;
+}
+
+
+{ .mfi
+ mov gp = GR_SAVE_GP
+ fnorm.d f8 = f8
+ mov b0 = GR_SAVE_B0
+}
+;;
+
+
+{ .mib
+ nop.m 999
+ mov ar.pfs = GR_SAVE_PFS
+ br.ret.sptk b0
+;;
+}
+
+
+.endp __libm_callout
+ASM_SIZE_DIRECTIVE(__libm_callout)
+
+.type __libm_tan#,@function
+.global __libm_tan#
diff --git a/sysdeps/ia64/fpu/s_tanf.S b/sysdeps/ia64/fpu/s_tanf.S
new file mode 100644
index 0000000..b4493c1
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_tanf.S
@@ -0,0 +1,757 @@
+.file "tanf.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00: Initial version
+// 4/04/00 Unwind support added
+// 12/27/00 Improved speed
+//
+// API
+//==============================================================
+// float tan( float x);
+//
+// Overview of operation
+//==============================================================
+// If the input value in radians is |x| >= 1.xxxxx 2^10 call the
+// older slower version.
+//
+// The new algorithm is used when |x| <= 1.xxxxx 2^9.
+//
+// Represent the input X as Nfloat * pi/2 + r
+// where r can be negative and |r| <= pi/4
+//
+// tan_W = x * 2/pi
+// Nfloat = round_int(tan_W)
+//
+// tan_r = x - Nfloat * (pi/2)_hi
+// tan_r = tan_r - Nfloat * (pi/2)_lo
+//
+// We have two paths: p8, when Nfloat is even and p9. when Nfloat is odd.
+// p8: tan(X) = tan(r)
+// p9: tan(X) = -cot(r)
+//
+// Each is evaluated as a series. The p9 path requires 1/r.
+//
+// The coefficients used in the series are stored in a table as
+// are the pi constants.
+//
+// Registers used
+//==============================================================
+//
+// predicate registers used:
+// p6-10
+//
+// floating-point registers used:
+// f10-15, f32-105
+// f8, input
+//
+// general registers used
+// r14-18, r32-43
+//
+
+#include "libm_support.h"
+
+// Assembly macros
+//==============================================================
+TAN_INV_PI_BY_2_2TO64 = f10
+TAN_RSHF_2TO64 = f11
+TAN_2TOM64 = f12
+TAN_RSHF = f13
+TAN_W_2TO64_RSH = f14
+TAN_NFLOAT = f15
+
+tan_Inv_Pi_by_2 = f32
+tan_Pi_by_2_hi = f33
+tan_Pi_by_2_lo = f34
+
+
+tan_P0 = f35
+tan_P1 = f36
+tan_P2 = f37
+tan_P3 = f38
+tan_P4 = f39
+tan_P5 = f40
+tan_P6 = f41
+tan_P7 = f42
+tan_P8 = f43
+tan_P9 = f44
+tan_P10 = f45
+tan_P11 = f46
+tan_P12 = f47
+tan_P13 = f48
+tan_P14 = f49
+tan_P15 = f50
+
+tan_Q0 = f51
+tan_Q1 = f52
+tan_Q2 = f53
+tan_Q3 = f54
+tan_Q4 = f55
+tan_Q5 = f56
+tan_Q6 = f57
+tan_Q7 = f58
+tan_Q8 = f59
+tan_Q9 = f60
+tan_Q10 = f61
+
+tan_r = f62
+tan_rsq = f63
+tan_rcube = f64
+
+tan_v18 = f65
+tan_v16 = f66
+tan_v17 = f67
+tan_v12 = f68
+tan_v13 = f69
+tan_v7 = f70
+tan_v8 = f71
+tan_v4 = f72
+tan_v5 = f73
+tan_v15 = f74
+tan_v11 = f75
+tan_v14 = f76
+tan_v3 = f77
+tan_v6 = f78
+tan_v10 = f79
+tan_v2 = f80
+tan_v9 = f81
+tan_v1 = f82
+tan_int_Nfloat = f83
+tan_Nfloat = f84
+
+tan_NORM_f8 = f85
+tan_W = f86
+
+tan_y0 = f87
+tan_d = f88
+tan_y1 = f89
+tan_dsq = f90
+tan_y2 = f91
+tan_d4 = f92
+tan_inv_r = f93
+
+tan_z1 = f94
+tan_z2 = f95
+tan_z3 = f96
+tan_z4 = f97
+tan_z5 = f98
+tan_z6 = f99
+tan_z7 = f100
+tan_z8 = f101
+tan_z9 = f102
+tan_z10 = f103
+tan_z11 = f104
+tan_z12 = f105
+
+
+/////////////////////////////////////////////////////////////
+
+tan_GR_sig_inv_pi_by_2 = r14
+tan_GR_rshf_2to64 = r15
+tan_GR_exp_2tom64 = r16
+tan_GR_n = r17
+tan_GR_rshf = r18
+
+tan_AD = r33
+tan_GR_10009 = r34
+tan_GR_17_ones = r35
+tan_GR_N_odd_even = r36
+tan_GR_N = r37
+tan_signexp = r38
+tan_exp = r39
+tan_ADQ = r40
+
+GR_SAVE_PFS = r41
+GR_SAVE_B0 = r42
+GR_SAVE_GP = r43
+
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
+.align 16
+
+double_tan_constants:
+ASM_TYPE_DIRECTIVE(double_tan_constants,@object)
+// data8 0xA2F9836E4E44152A, 0x00003FFE // 2/pi
+ data8 0xC90FDAA22168C234, 0x00003FFF // pi/2 hi
+
+ data8 0xBEEA54580DDEA0E1 // P14
+ data8 0x3ED3021ACE749A59 // P15
+ data8 0xBEF312BD91DC8DA1 // P12
+ data8 0x3EFAE9AFC14C5119 // P13
+ data8 0x3F2F342BF411E769 // P8
+ data8 0x3F1A60FC9F3B0227 // P9
+ data8 0x3EFF246E78E5E45B // P10
+ data8 0x3F01D9D2E782875C // P11
+ data8 0x3F8226E34C4499B6 // P4
+ data8 0x3F6D6D3F12C236AC // P5
+ data8 0x3F57DA1146DCFD8B // P6
+ data8 0x3F43576410FE3D75 // P7
+ data8 0x3FD5555555555555 // P0
+ data8 0x3FC11111111111C2 // P1
+ data8 0x3FABA1BA1BA0E850 // P2
+ data8 0x3F9664F4886725A7 // P3
+ASM_SIZE_DIRECTIVE(double_tan_constants)
+
+double_Q_tan_constants:
+ASM_TYPE_DIRECTIVE(double_Q_tan_constants,@object)
+ data8 0xC4C6628B80DC1CD1, 0x00003FBF // pi/2 lo
+ data8 0x3E223A73BA576E48 // Q8
+ data8 0x3DF54AD8D1F2CA43 // Q9
+ data8 0x3EF66A8EE529A6AA // Q4
+ data8 0x3EC2281050410EE6 // Q5
+ data8 0x3E8D6BB992CC3CF5 // Q6
+ data8 0x3E57F88DE34832E4 // Q7
+ data8 0x3FD5555555555555 // Q0
+ data8 0x3F96C16C16C16DB8 // Q1
+ data8 0x3F61566ABBFFB489 // Q2
+ data8 0x3F2BBD77945C1733 // Q3
+ data8 0x3D927FB33E2B0E04 // Q10
+ASM_SIZE_DIRECTIVE(double_Q_tan_constants)
+
+
+
+.align 32
+.global tanf#
+#ifdef _LIBC
+.global __tanf#
+#endif
+
+////////////////////////////////////////////////////////
+
+
+
+.section .text
+.proc tanf#
+#ifdef _LIBC
+.proc __tanf#
+#endif
+.align 32
+tanf:
+#ifdef _LIBC
+__tanf:
+#endif
+// The initial fnorm will take any unmasked faults and
+// normalize any single/double unorms
+
+{ .mlx
+ alloc r32=ar.pfs,1,11,0,0
+ movl tan_GR_sig_inv_pi_by_2 = 0xA2F9836E4E44152A // significand of 2/pi
+}
+{ .mlx
+ addl tan_AD = @ltoff(double_tan_constants), gp
+ movl tan_GR_rshf_2to64 = 0x47e8000000000000 // 1.1000 2^(63+63+1)
+}
+;;
+
+{ .mfi
+ ld8 tan_AD = [tan_AD]
+ fnorm tan_NORM_f8 = f8
+ mov tan_GR_exp_2tom64 = 0xffff-64 // exponent of scaling factor 2^-64
+}
+{ .mlx
+ nop.m 999
+ movl tan_GR_rshf = 0x43e8000000000000 // 1.1000 2^63 for right shift
+}
+;;
+
+
+// Form two constants we need
+// 2/pi * 2^1 * 2^63, scaled by 2^64 since we just loaded the significand
+// 1.1000...000 * 2^(63+63+1) to right shift int(W) into the significand
+{ .mmi
+ setf.sig TAN_INV_PI_BY_2_2TO64 = tan_GR_sig_inv_pi_by_2
+ setf.d TAN_RSHF_2TO64 = tan_GR_rshf_2to64
+ mov tan_GR_17_ones = 0x1ffff ;;
+}
+
+
+// Form another constant
+// 2^-64 for scaling Nfloat
+// 1.1000...000 * 2^63, the right shift constant
+{ .mmf
+ setf.exp TAN_2TOM64 = tan_GR_exp_2tom64
+ adds tan_ADQ = double_Q_tan_constants - double_tan_constants, tan_AD
+ fclass.m.unc p6,p0 = f8, 0x07 // Test for x=0
+}
+;;
+
+
+// Form another constant
+// 2^-64 for scaling Nfloat
+// 1.1000...000 * 2^63, the right shift constant
+{ .mmf
+ setf.d TAN_RSHF = tan_GR_rshf
+ ldfe tan_Pi_by_2_hi = [tan_AD],16
+ fclass.m.unc p7,p0 = f8, 0x23 // Test for x=inf
+}
+;;
+
+{ .mfb
+ ldfe tan_Pi_by_2_lo = [tan_ADQ],16
+ fclass.m.unc p8,p0 = f8, 0xc3 // Test for x=nan
+(p6) br.ret.spnt b0 ;; // Exit for x=0
+}
+
+{ .mfi
+ ldfpd tan_P14,tan_P15 = [tan_AD],16
+(p7) frcpa.s0 f8,p9=f0,f0 // Set qnan indef if x=inf
+ mov tan_GR_10009 = 0x10009
+}
+{ .mib
+ ldfpd tan_Q8,tan_Q9 = [tan_ADQ],16
+ nop.i 999
+(p7) br.ret.spnt b0 ;; // Exit for x=inf
+}
+
+{ .mfi
+ ldfpd tan_P12,tan_P13 = [tan_AD],16
+(p8) fma.s f8=f8,f1,f8 // Set qnan if x=nan
+ nop.i 999
+}
+{ .mib
+ ldfpd tan_Q4,tan_Q5 = [tan_ADQ],16
+ nop.i 999
+(p8) br.ret.spnt b0 ;; // Exit for x=nan
+}
+
+{ .mmi
+ getf.exp tan_signexp = tan_NORM_f8
+ ldfpd tan_P8,tan_P9 = [tan_AD],16
+ nop.i 999 ;;
+}
+
+// Multiply x by scaled 2/pi and add large const to shift integer part of W to
+// rightmost bits of significand
+{ .mfi
+ ldfpd tan_Q6,tan_Q7 = [tan_ADQ],16
+ fma.s1 TAN_W_2TO64_RSH = tan_NORM_f8,TAN_INV_PI_BY_2_2TO64,TAN_RSHF_2TO64
+ nop.i 999 ;;
+}
+
+{ .mmi
+ ldfpd tan_P10,tan_P11 = [tan_AD],16
+ nop.m 999
+ and tan_exp = tan_GR_17_ones, tan_signexp ;;
+}
+
+
+// p7 is true if we must call DBX TAN
+// p7 is true if f8 exp is > 0x10009 (which includes all ones
+// NAN or inf)
+{ .mmi
+ ldfpd tan_Q0,tan_Q1 = [tan_ADQ],16
+ cmp.ge.unc p7,p0 = tan_exp,tan_GR_10009
+ nop.i 999 ;;
+}
+
+
+{ .mmb
+ ldfpd tan_P4,tan_P5 = [tan_AD],16
+ nop.m 999
+(p7) br.cond.spnt L(TAN_DBX) ;;
+}
+
+
+{ .mmi
+ ldfpd tan_Q2,tan_Q3 = [tan_ADQ],16
+ nop.m 999
+ nop.i 999 ;;
+}
+
+
+
+// TAN_NFLOAT = Round_Int_Nearest(tan_W)
+{ .mfi
+ ldfpd tan_P6,tan_P7 = [tan_AD],16
+ fms.s1 TAN_NFLOAT = TAN_W_2TO64_RSH,TAN_2TOM64,TAN_RSHF
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ ldfd tan_Q10 = [tan_ADQ]
+ nop.f 999
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ ldfpd tan_P0,tan_P1 = [tan_AD],16
+ nop.f 999
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ getf.sig tan_GR_n = TAN_W_2TO64_RSH
+ nop.f 999
+ nop.i 999 ;;
+}
+
+// tan_r = -tan_Nfloat * tan_Pi_by_2_hi + x
+{ .mfi
+ ldfpd tan_P2,tan_P3 = [tan_AD]
+ fnma.s1 tan_r = TAN_NFLOAT, tan_Pi_by_2_hi, tan_NORM_f8
+ nop.i 999 ;;
+}
+
+
+// p8 ==> even
+// p9 ==> odd
+{ .mmi
+ and tan_GR_N_odd_even = 0x1, tan_GR_n ;;
+ nop.m 999
+ cmp.eq.unc p8,p9 = tan_GR_N_odd_even, r0 ;;
+}
+
+
+// tan_r = tan_r -tan_Nfloat * tan_Pi_by_2_lo
+{ .mfi
+ nop.m 999
+ fnma.s1 tan_r = TAN_NFLOAT, tan_Pi_by_2_lo, tan_r
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 tan_rsq = tan_r, tan_r, f0
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p9) frcpa.s1 tan_y0, p10 = f1,tan_r
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 tan_v18 = tan_rsq, tan_P15, tan_P14
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p8) fma.s1 tan_v4 = tan_rsq, tan_P1, tan_P0
+ nop.i 999 ;;
+}
+
+
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 tan_v16 = tan_rsq, tan_P13, tan_P12
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p8) fma.s1 tan_v17 = tan_rsq, tan_rsq, f0
+ nop.i 999 ;;
+}
+
+
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 tan_v12 = tan_rsq, tan_P9, tan_P8
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p8) fma.s1 tan_v13 = tan_rsq, tan_P11, tan_P10
+ nop.i 999 ;;
+}
+
+
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 tan_v7 = tan_rsq, tan_P5, tan_P4
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p8) fma.s1 tan_v8 = tan_rsq, tan_P7, tan_P6
+ nop.i 999 ;;
+}
+
+
+
+{ .mfi
+ nop.m 999
+(p9) fnma.s1 tan_d = tan_r, tan_y0, f1
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p8) fma.s1 tan_v5 = tan_rsq, tan_P3, tan_P2
+ nop.i 999 ;;
+}
+
+
+
+{ .mfi
+ nop.m 999
+(p9) fma.s1 tan_z11 = tan_rsq, tan_Q9, tan_Q8
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p9) fma.s1 tan_z12 = tan_rsq, tan_rsq, f0
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 tan_v15 = tan_v17, tan_v18, tan_v16
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p9) fma.s1 tan_z7 = tan_rsq, tan_Q5, tan_Q4
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 tan_v11 = tan_v17, tan_v13, tan_v12
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p9) fma.s1 tan_z8 = tan_rsq, tan_Q7, tan_Q6
+ nop.i 999 ;;
+}
+
+
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 tan_v14 = tan_v17, tan_v17, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p9) fma.s1 tan_z3 = tan_rsq, tan_Q1, tan_Q0
+ nop.i 999 ;;
+}
+
+
+
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 tan_v3 = tan_v17, tan_v5, tan_v4
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p8) fma.s1 tan_v6 = tan_v17, tan_v8, tan_v7
+ nop.i 999 ;;
+}
+
+
+
+{ .mfi
+ nop.m 999
+(p9) fma.s1 tan_y1 = tan_y0, tan_d, tan_y0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p9) fma.s1 tan_dsq = tan_d, tan_d, f0
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p9) fma.s1 tan_z10 = tan_z12, tan_Q10, tan_z11
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p9) fma.s1 tan_z9 = tan_z12, tan_z12,f0
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p9) fma.s1 tan_z4 = tan_rsq, tan_Q3, tan_Q2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p9) fma.s1 tan_z6 = tan_z12, tan_z8, tan_z7
+ nop.i 999 ;;
+}
+
+
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 tan_v10 = tan_v14, tan_v15, tan_v11
+ nop.i 999 ;;
+}
+
+
+
+{ .mfi
+ nop.m 999
+(p9) fma.s1 tan_y2 = tan_y1, tan_d, tan_y0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p9) fma.s1 tan_d4 = tan_dsq, tan_dsq, tan_d
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 tan_v2 = tan_v14, tan_v6, tan_v3
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p8) fma.s1 tan_v9 = tan_v14, tan_v14, f0
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p9) fma.s1 tan_z2 = tan_z12, tan_z4, tan_z3
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p9) fma.s1 tan_z5 = tan_z9, tan_z10, tan_z6
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p9) fma.s1 tan_inv_r = tan_d4, tan_y2, tan_y0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p8) fma.s1 tan_rcube = tan_rsq, tan_r, f0
+ nop.i 999 ;;
+}
+
+
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 tan_v1 = tan_v9, tan_v10, tan_v2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p9) fma.s1 tan_z1 = tan_z9, tan_z5, tan_z2
+ nop.i 999 ;;
+}
+
+
+
+{ .mfi
+ nop.m 999
+(p8) fma.s.s0 f8 = tan_v1, tan_rcube, tan_r
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p9) fms.s.s0 f8 = tan_r, tan_z1, tan_inv_r
+ br.ret.sptk b0 ;;
+}
+.endp tanf#
+ASM_SIZE_DIRECTIVE(tanf#)
+
+
+.proc __libm_callout
+__libm_callout:
+L(TAN_DBX):
+.prologue
+
+{ .mfi
+ nop.m 0
+ fmerge.s f9 = f0,f0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs
+}
+;;
+
+{ .mfi
+ mov GR_SAVE_GP=gp
+ nop.f 0
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0
+}
+
+.body
+{ .mfb
+ nop.m 999
+ nop.f 999
+ br.call.sptk.many b0=__libm_tan# ;;
+}
+
+
+{ .mfi
+ mov gp = GR_SAVE_GP
+ fnorm.s f8 = f8
+ mov b0 = GR_SAVE_B0
+}
+;;
+
+
+{ .mib
+ nop.m 999
+ mov ar.pfs = GR_SAVE_PFS
+ br.ret.sptk b0
+;;
+}
+
+
+.endp __libm_callout
+ASM_SIZE_DIRECTIVE(__libm_callout)
+
+.type __libm_tan#,@function
+.global __libm_tan#
diff --git a/sysdeps/ia64/fpu/s_tanl.S b/sysdeps/ia64/fpu/s_tanl.S
new file mode 100644
index 0000000..d7cc3ee
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_tanl.S
@@ -0,0 +1,3057 @@
+.file "tanl.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+// *********************************************************************
+//
+// History:
+//
+// 2/02/2000 (hand-optimized)
+// 4/04/00 Unwind support added
+// 12/28/00 Fixed false invalid flags
+//
+// *********************************************************************
+//
+// Function: tanl(x) = tangent(x), for double-extended precision x values
+//
+// *********************************************************************
+//
+// Resources Used:
+//
+// Floating-Point Registers: f8 (Input and Return Value)
+// f9-f15
+// f32-f112
+//
+// General Purpose Registers:
+// r32-r48
+// r49-r50 (Used to pass arguments to pi_by_2 reduce routine)
+//
+// Predicate Registers: p6-p15
+//
+// *********************************************************************
+//
+// IEEE Special Conditions:
+//
+// Denormal fault raised on denormal inputs
+// Overflow exceptions do not occur
+// Underflow exceptions raised when appropriate for tan
+// (No specialized error handling for this routine)
+// Inexact raised when appropriate by algorithm
+//
+// tan(SNaN) = QNaN
+// tan(QNaN) = QNaN
+// tan(inf) = QNaN
+// tan(+/-0) = +/-0
+//
+// *********************************************************************
+//
+// Mathematical Description
+//
+// We consider the computation of FPTANL of Arg. Now, given
+//
+// Arg = N pi/2 + alpha, |alpha| <= pi/4,
+//
+// basic mathematical relationship shows that
+//
+// tan( Arg ) = tan( alpha ) if N is even;
+// = -cot( alpha ) otherwise.
+//
+// The value of alpha is obtained by argument reduction and
+// represented by two working precision numbers r and c where
+//
+// alpha = r + c accurately.
+//
+// The reduction method is described in a previous write up.
+// The argument reduction scheme identifies 4 cases. For Cases 2
+// and 4, because |alpha| is small, tan(r+c) and -cot(r+c) can be
+// computed very easily by 2 or 3 terms of the Taylor series
+// expansion as follows:
+//
+// Case 2:
+// -------
+//
+// tan(r + c) = r + c + r^3/3 ...accurately
+// -cot(r + c) = -1/(r+c) + r/3 ...accurately
+//
+// Case 4:
+// -------
+//
+// tan(r + c) = r + c + r^3/3 + 2r^5/15 ...accurately
+// -cot(r + c) = -1/(r+c) + r/3 + r^3/45 ...accurately
+//
+//
+// The only cases left are Cases 1 and 3 of the argument reduction
+// procedure. These two cases will be merged since after the
+// argument is reduced in either cases, we have the reduced argument
+// represented as r + c and that the magnitude |r + c| is not small
+// enough to allow the usage of a very short approximation.
+//
+// The greatest challenge of this task is that the second terms of
+// the Taylor series for tan(r) and -cot(r)
+//
+// r + r^3/3 + 2 r^5/15 + ...
+//
+// and
+//
+// -1/r + r/3 + r^3/45 + ...
+//
+// are not very small when |r| is close to pi/4 and the rounding
+// errors will be a concern if simple polynomial accumulation is
+// used. When |r| < 2^(-2), however, the second terms will be small
+// enough (5 bits or so of right shift) that a normal Horner
+// recurrence suffices. Hence there are two cases that we consider
+// in the accurate computation of tan(r) and cot(r), |r| <= pi/4.
+//
+// Case small_r: |r| < 2^(-2)
+// --------------------------
+//
+// Since Arg = N pi/4 + r + c accurately, we have
+//
+// tan(Arg) = tan(r+c) for N even,
+// = -cot(r+c) otherwise.
+//
+// Here for this case, both tan(r) and -cot(r) can be approximated
+// by simple polynomials:
+//
+// tan(r) = r + P1_1 r^3 + P1_2 r^5 + ... + P1_9 r^19
+// -cot(r) = -1/r + Q1_1 r + Q1_2 r^3 + ... + Q1_7 r^13
+//
+// accurately. Since |r| is relatively small, tan(r+c) and
+// -cot(r+c) can be accurately approximated by replacing r with
+// r+c only in the first two terms of the corresponding polynomials.
+//
+// Note that P1_1 (and Q1_1 for that matter) approximates 1/3 to
+// almost 64 sig. bits, thus
+//
+// P1_1 (r+c)^3 = P1_1 r^3 + c * r^2 accurately.
+//
+// Hence,
+//
+// tan(r+c) = r + P1_1 r^3 + P1_2 r^5 + ... + P1_9 r^19
+// + c*(1 + r^2)
+//
+// -cot(r+c) = -1/(r+c) + Q1_1 r + Q1_2 r^3 + ... + Q1_7 r^13
+// + Q1_1*c
+//
+//
+// Case normal_r: 2^(-2) <= |r| <= pi/4
+// ------------------------------------
+//
+// This case is more likely than the previous one if one considers
+// r to be uniformly distributed in [-pi/4 pi/4].
+//
+// The required calculation is either
+//
+// tan(r + c) = tan(r) + correction, or
+// -cot(r + c) = -cot(r) + correction.
+//
+// Specifically,
+//
+// tan(r + c) = tan(r) + c tan'(r) + O(c^2)
+// = tan(r) + c sec^2(r) + O(c^2)
+// = tan(r) + c SEC_sq ...accurately
+// as long as SEC_sq approximates sec^2(r)
+// to, say, 5 bits or so.
+//
+// Similarly,
+//
+// -cot(r + c) = -cot(r) - c cot'(r) + O(c^2)
+// = -cot(r) + c csc^2(r) + O(c^2)
+// = -cot(r) + c CSC_sq ...accurately
+// as long as CSC_sq approximates csc^2(r)
+// to, say, 5 bits or so.
+//
+// We therefore concentrate on accurately calculating tan(r) and
+// cot(r) for a working-precision number r, |r| <= pi/4 to within
+// 0.1% or so.
+//
+// We will employ a table-driven approach. Let
+//
+// r = sgn_r * 2^k * 1.b_1 b_2 ... b_5 ... b_63
+// = sgn_r * ( B + x )
+//
+// where
+//
+// B = 2^k * 1.b_1 b_2 ... b_5 1
+// x = |r| - B
+//
+// Now,
+// tan(B) + tan(x)
+// tan( B + x ) = ------------------------
+// 1 - tan(B)*tan(x)
+//
+// / \
+// | tan(B) + tan(x) |
+
+// = tan(B) + | ------------------------ - tan(B) |
+// | 1 - tan(B)*tan(x) |
+// \ /
+//
+// sec^2(B) * tan(x)
+// = tan(B) + ------------------------
+// 1 - tan(B)*tan(x)
+//
+// (1/[sin(B)*cos(B)]) * tan(x)
+// = tan(B) + --------------------------------
+// cot(B) - tan(x)
+//
+//
+// Clearly, the values of tan(B), cot(B) and 1/(sin(B)*cos(B)) are
+// calculated beforehand and stored in a table. Since
+//
+// |x| <= 2^k * 2^(-6) <= 2^(-7) (because k = -1, -2)
+//
+// a very short polynomial will be sufficient to approximate tan(x)
+// accurately. The details involved in computing the last expression
+// will be given in the next section on algorithm description.
+//
+//
+// Now, we turn to the case where cot( B + x ) is needed.
+//
+//
+// 1 - tan(B)*tan(x)
+// cot( B + x ) = ------------------------
+// tan(B) + tan(x)
+//
+// / \
+// | 1 - tan(B)*tan(x) |
+
+// = cot(B) + | ----------------------- - cot(B) |
+// | tan(B) + tan(x) |
+// \ /
+//
+// [tan(B) + cot(B)] * tan(x)
+// = cot(B) - ----------------------------
+// tan(B) + tan(x)
+//
+// (1/[sin(B)*cos(B)]) * tan(x)
+// = cot(B) - --------------------------------
+// tan(B) + tan(x)
+//
+//
+// Note that the values of tan(B), cot(B) and 1/(sin(B)*cos(B)) that
+// are needed are the same set of values needed in the previous
+// case.
+//
+// Finally, we can put all the ingredients together as follows:
+//
+// Arg = N * pi/2 + r + c ...accurately
+//
+// tan(Arg) = tan(r) + correction if N is even;
+// = -cot(r) + correction otherwise.
+//
+// For Cases 2 and 4,
+//
+// Case 2:
+// tan(Arg) = tan(r + c) = r + c + r^3/3 N even
+// = -cot(r + c) = -1/(r+c) + r/3 N odd
+// Case 4:
+// tan(Arg) = tan(r + c) = r + c + r^3/3 + 2r^5/15 N even
+// = -cot(r + c) = -1/(r+c) + r/3 + r^3/45 N odd
+//
+//
+// For Cases 1 and 3,
+//
+// Case small_r: |r| < 2^(-2)
+//
+// tan(Arg) = r + P1_1 r^3 + P1_2 r^5 + ... + P1_9 r^19
+// + c*(1 + r^2) N even
+//
+// = -1/(r+c) + Q1_1 r + Q1_2 r^3 + ... + Q1_7 r^13
+// + Q1_1*c N odd
+//
+// Case normal_r: 2^(-2) <= |r| <= pi/4
+//
+// tan(Arg) = tan(r) + c * sec^2(r) N even
+// = -cot(r) + c * csc^2(r) otherwise
+//
+// For N even,
+//
+// tan(Arg) = tan(r) + c*sec^2(r)
+// = tan( sgn_r * (B+x) ) + c * sec^2(|r|)
+// = sgn_r * ( tan(B+x) + sgn_r*c*sec^2(|r|) )
+// = sgn_r * ( tan(B+x) + sgn_r*c*sec^2(B) )
+//
+// since B approximates |r| to 2^(-6) in relative accuracy.
+//
+// / (1/[sin(B)*cos(B)]) * tan(x)
+// tan(Arg) = sgn_r * | tan(B) + --------------------------------
+// \ cot(B) - tan(x)
+// \
+// + CORR |
+
+// /
+// where
+//
+// CORR = sgn_r*c*tan(B)*SC_inv(B); SC_inv(B) = 1/(sin(B)*cos(B)).
+//
+// For N odd,
+//
+// tan(Arg) = -cot(r) + c*csc^2(r)
+// = -cot( sgn_r * (B+x) ) + c * csc^2(|r|)
+// = sgn_r * ( -cot(B+x) + sgn_r*c*csc^2(|r|) )
+// = sgn_r * ( -cot(B+x) + sgn_r*c*csc^2(B) )
+//
+// since B approximates |r| to 2^(-6) in relative accuracy.
+//
+// / (1/[sin(B)*cos(B)]) * tan(x)
+// tan(Arg) = sgn_r * | -cot(B) + --------------------------------
+// \ tan(B) + tan(x)
+// \
+// + CORR |
+
+// /
+// where
+//
+// CORR = sgn_r*c*cot(B)*SC_inv(B); SC_inv(B) = 1/(sin(B)*cos(B)).
+//
+//
+// The actual algorithm prescribes how all the mathematical formulas
+// are calculated.
+//
+//
+// 2. Algorithmic Description
+// ==========================
+//
+// 2.1 Computation for Cases 2 and 4.
+// ----------------------------------
+//
+// For Case 2, we use two-term polynomials.
+//
+// For N even,
+//
+// rsq := r * r
+// Result := c + r * rsq * P1_1
+// Result := r + Result ...in user-defined rounding
+//
+// For N odd,
+// S_hi := -frcpa(r) ...8 bits
+// S_hi := S_hi + S_hi*(1 + S_hi*r) ...16 bits
+// S_hi := S_hi + S_hi*(1 + S_hi*r) ...32 bits
+// S_hi := S_hi + S_hi*(1 + S_hi*r) ...64 bits
+// S_lo := S_hi*( (1 + S_hi*r) + S_hi*c )
+// ...S_hi + S_lo is -1/(r+c) to extra precision
+// S_lo := S_lo + Q1_1*r
+//
+// Result := S_hi + S_lo ...in user-defined rounding
+//
+// For Case 4, we use three-term polynomials
+//
+// For N even,
+//
+// rsq := r * r
+// Result := c + r * rsq * (P1_1 + rsq * P1_2)
+// Result := r + Result ...in user-defined rounding
+//
+// For N odd,
+// S_hi := -frcpa(r) ...8 bits
+// S_hi := S_hi + S_hi*(1 + S_hi*r) ...16 bits
+// S_hi := S_hi + S_hi*(1 + S_hi*r) ...32 bits
+// S_hi := S_hi + S_hi*(1 + S_hi*r) ...64 bits
+// S_lo := S_hi*( (1 + S_hi*r) + S_hi*c )
+// ...S_hi + S_lo is -1/(r+c) to extra precision
+// rsq := r * r
+// P := Q1_1 + rsq*Q1_2
+// S_lo := S_lo + r*P
+//
+// Result := S_hi + S_lo ...in user-defined rounding
+//
+//
+// Note that the coefficients P1_1, P1_2, Q1_1, and Q1_2 are
+// the same as those used in the small_r case of Cases 1 and 3
+// below.
+//
+//
+// 2.2 Computation for Cases 1 and 3.
+// ----------------------------------
+// This is further divided into the case of small_r,
+// where |r| < 2^(-2), and the case of normal_r, where |r| lies between
+// 2^(-2) and pi/4.
+//
+// Algorithm for the case of small_r
+// ---------------------------------
+//
+// For N even,
+// rsq := r * r
+// Poly1 := rsq*(P1_1 + rsq*(P1_2 + rsq*P1_3))
+// r_to_the_8 := rsq * rsq
+// r_to_the_8 := r_to_the_8 * r_to_the_8
+// Poly2 := P1_4 + rsq*(P1_5 + rsq*(P1_6 + ... rsq*P1_9))
+// CORR := c * ( 1 + rsq )
+// Poly := Poly1 + r_to_the_8*Poly2
+// Result := r*Poly + CORR
+// Result := r + Result ...in user-defined rounding
+// ...note that Poly1 and r_to_the_8 can be computed in parallel
+// ...with Poly2 (Poly1 is intentionally set to be much
+// ...shorter than Poly2 so that r_to_the_8 and CORR can be hidden)
+//
+// For N odd,
+// S_hi := -frcpa(r) ...8 bits
+// S_hi := S_hi + S_hi*(1 + S_hi*r) ...16 bits
+// S_hi := S_hi + S_hi*(1 + S_hi*r) ...32 bits
+// S_hi := S_hi + S_hi*(1 + S_hi*r) ...64 bits
+// S_lo := S_hi*( (1 + S_hi*r) + S_hi*c )
+// ...S_hi + S_lo is -1/(r+c) to extra precision
+// S_lo := S_lo + Q1_1*c
+//
+// ...S_hi and S_lo are computed in parallel with
+// ...the following
+// rsq := r*r
+// P := Q1_1 + rsq*(Q1_2 + rsq*(Q1_3 + ... + rsq*Q1_7))
+//
+// Result := r*P + S_lo
+// Result := S_hi + Result ...in user-defined rounding
+//
+//
+// Algorithm for the case of normal_r
+// ----------------------------------
+//
+// Here, we first consider the computation of tan( r + c ). As
+// presented in the previous section,
+//
+// tan( r + c ) = tan(r) + c * sec^2(r)
+// = sgn_r * [ tan(B+x) + CORR ]
+// CORR = sgn_r * c * tan(B) * 1/[sin(B)*cos(B)]
+//
+// because sec^2(r) = sec^(|r|), and B approximate |r| to 6.5 bits.
+//
+// tan( r + c ) =
+// / (1/[sin(B)*cos(B)]) * tan(x)
+// sgn_r * | tan(B) + -------------------------------- +
+// \ cot(B) - tan(x)
+// \
+// CORR |
+
+// /
+//
+// The values of tan(B), cot(B) and 1/(sin(B)*cos(B)) are
+// calculated beforehand and stored in a table. Specifically,
+// the table values are
+//
+// tan(B) as T_hi + T_lo;
+// cot(B) as C_hi + C_lo;
+// 1/[sin(B)*cos(B)] as SC_inv
+//
+// T_hi, C_hi are in double-precision memory format;
+// T_lo, C_lo are in single-precision memory format;
+// SC_inv is in extended-precision memory format.
+//
+// The value of tan(x) will be approximated by a short polynomial of
+// the form
+//
+// tan(x) as x + x * P, where
+// P = x^2 * (P2_1 + x^2 * (P2_2 + x^2 * P2_3))
+//
+// Because |x| <= 2^(-7), cot(B) - x approximates cot(B) - tan(x)
+// to a relative accuracy better than 2^(-20). Thus, a good
+// initial guess of 1/( cot(B) - tan(x) ) to initiate the iterative
+// division is:
+//
+// 1/(cot(B) - tan(x)) is approximately
+// 1/(cot(B) - x) is
+// tan(B)/(1 - x*tan(B)) is approximately
+// T_hi / ( 1 - T_hi * x ) is approximately
+//
+// T_hi * [ 1 + (Thi * x) + (T_hi * x)^2 ]
+//
+// The calculation of tan(r+c) therefore proceed as follows:
+//
+// Tx := T_hi * x
+// xsq := x * x
+//
+// V_hi := T_hi*(1 + Tx*(1 + Tx))
+// P := xsq * (P1_1 + xsq*(P1_2 + xsq*P1_3))
+// ...V_hi serves as an initial guess of 1/(cot(B) - tan(x))
+// ...good to about 20 bits of accuracy
+//
+// tanx := x + x*P
+// D := C_hi - tanx
+// ...D is a double precision denominator: cot(B) - tan(x)
+//
+// V_hi := V_hi + V_hi*(1 - V_hi*D)
+// ....V_hi approximates 1/(cot(B)-tan(x)) to 40 bits
+//
+// V_lo := V_hi * ( [ (1 - V_hi*C_hi) + V_hi*tanx ]
+// - V_hi*C_lo ) ...observe all order
+// ...V_hi + V_lo approximates 1/(cot(B) - tan(x))
+// ...to extra accuracy
+//
+// ... SC_inv(B) * (x + x*P)
+// ... tan(B) + ------------------------- + CORR
+// ... cot(B) - (x + x*P)
+// ...
+// ... = tan(B) + SC_inv(B)*(x + x*P)*(V_hi + V_lo) + CORR
+// ...
+//
+// Sx := SC_inv * x
+// CORR := sgn_r * c * SC_inv * T_hi
+//
+// ...put the ingredients together to compute
+// ... SC_inv(B) * (x + x*P)
+// ... tan(B) + ------------------------- + CORR
+// ... cot(B) - (x + x*P)
+// ...
+// ... = tan(B) + SC_inv(B)*(x + x*P)*(V_hi + V_lo) + CORR
+// ...
+// ... = T_hi + T_lo + CORR +
+// ... Sx * V_hi + Sx * V_lo + Sx * P *(V_hi + V_lo)
+//
+// CORR := CORR + T_lo
+// tail := V_lo + P*(V_hi + V_lo)
+// tail := Sx * tail + CORR
+// tail := Sx * V_hi + tail
+// T_hi := sgn_r * T_hi
+//
+// ...T_hi + sgn_r*tail now approximate
+// ...sgn_r*(tan(B+x) + CORR) accurately
+//
+// Result := T_hi + sgn_r*tail ...in user-defined
+// ...rounding control
+// ...It is crucial that independent paths be fully
+// ...exploited for performance's sake.
+//
+//
+// Next, we consider the computation of -cot( r + c ). As
+// presented in the previous section,
+//
+// -cot( r + c ) = -cot(r) + c * csc^2(r)
+// = sgn_r * [ -cot(B+x) + CORR ]
+// CORR = sgn_r * c * cot(B) * 1/[sin(B)*cos(B)]
+//
+// because csc^2(r) = csc^(|r|), and B approximate |r| to 6.5 bits.
+//
+// -cot( r + c ) =
+// / (1/[sin(B)*cos(B)]) * tan(x)
+// sgn_r * | -cot(B) + -------------------------------- +
+// \ tan(B) + tan(x)
+// \
+// CORR |
+
+// /
+//
+// The values of tan(B), cot(B) and 1/(sin(B)*cos(B)) are
+// calculated beforehand and stored in a table. Specifically,
+// the table values are
+//
+// tan(B) as T_hi + T_lo;
+// cot(B) as C_hi + C_lo;
+// 1/[sin(B)*cos(B)] as SC_inv
+//
+// T_hi, C_hi are in double-precision memory format;
+// T_lo, C_lo are in single-precision memory format;
+// SC_inv is in extended-precision memory format.
+//
+// The value of tan(x) will be approximated by a short polynomial of
+// the form
+//
+// tan(x) as x + x * P, where
+// P = x^2 * (P2_1 + x^2 * (P2_2 + x^2 * P2_3))
+//
+// Because |x| <= 2^(-7), tan(B) + x approximates tan(B) + tan(x)
+// to a relative accuracy better than 2^(-18). Thus, a good
+// initial guess of 1/( tan(B) + tan(x) ) to initiate the iterative
+// division is:
+//
+// 1/(tan(B) + tan(x)) is approximately
+// 1/(tan(B) + x) is
+// cot(B)/(1 + x*cot(B)) is approximately
+// C_hi / ( 1 + C_hi * x ) is approximately
+//
+// C_hi * [ 1 - (C_hi * x) + (C_hi * x)^2 ]
+//
+// The calculation of -cot(r+c) therefore proceed as follows:
+//
+// Cx := C_hi * x
+// xsq := x * x
+//
+// V_hi := C_hi*(1 - Cx*(1 - Cx))
+// P := xsq * (P1_1 + xsq*(P1_2 + xsq*P1_3))
+// ...V_hi serves as an initial guess of 1/(tan(B) + tan(x))
+// ...good to about 18 bits of accuracy
+//
+// tanx := x + x*P
+// D := T_hi + tanx
+// ...D is a double precision denominator: tan(B) + tan(x)
+//
+// V_hi := V_hi + V_hi*(1 - V_hi*D)
+// ....V_hi approximates 1/(tan(B)+tan(x)) to 40 bits
+//
+// V_lo := V_hi * ( [ (1 - V_hi*T_hi) - V_hi*tanx ]
+// - V_hi*T_lo ) ...observe all order
+// ...V_hi + V_lo approximates 1/(tan(B) + tan(x))
+// ...to extra accuracy
+//
+// ... SC_inv(B) * (x + x*P)
+// ... -cot(B) + ------------------------- + CORR
+// ... tan(B) + (x + x*P)
+// ...
+// ... =-cot(B) + SC_inv(B)*(x + x*P)*(V_hi + V_lo) + CORR
+// ...
+//
+// Sx := SC_inv * x
+// CORR := sgn_r * c * SC_inv * C_hi
+//
+// ...put the ingredients together to compute
+// ... SC_inv(B) * (x + x*P)
+// ... -cot(B) + ------------------------- + CORR
+// ... tan(B) + (x + x*P)
+// ...
+// ... =-cot(B) + SC_inv(B)*(x + x*P)*(V_hi + V_lo) + CORR
+// ...
+// ... =-C_hi - C_lo + CORR +
+// ... Sx * V_hi + Sx * V_lo + Sx * P *(V_hi + V_lo)
+//
+// CORR := CORR - C_lo
+// tail := V_lo + P*(V_hi + V_lo)
+// tail := Sx * tail + CORR
+// tail := Sx * V_hi + tail
+// C_hi := -sgn_r * C_hi
+//
+// ...C_hi + sgn_r*tail now approximates
+// ...sgn_r*(-cot(B+x) + CORR) accurately
+//
+// Result := C_hi + sgn_r*tail in user-defined rounding control
+// ...It is crucial that independent paths be fully
+// ...exploited for performance's sake.
+//
+// 3. Implementation Notes
+// =======================
+//
+// Table entries T_hi, T_lo; C_hi, C_lo; SC_inv
+//
+// Recall that 2^(-2) <= |r| <= pi/4;
+//
+// r = sgn_r * 2^k * 1.b_1 b_2 ... b_63
+//
+// and
+//
+// B = 2^k * 1.b_1 b_2 b_3 b_4 b_5 1
+//
+// Thus, for k = -2, possible values of B are
+//
+// B = 2^(-2) * ( 1 + index/32 + 1/64 ),
+// index ranges from 0 to 31
+//
+// For k = -1, however, since |r| <= pi/4 = 0.78...
+// possible values of B are
+//
+// B = 2^(-1) * ( 1 + index/32 + 1/64 )
+// index ranges from 0 to 19.
+//
+//
+
+#include "libm_support.h"
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+.align 128
+
+TANL_BASE_CONSTANTS:
+ASM_TYPE_DIRECTIVE(TANL_BASE_CONSTANTS,@object)
+data4 0x4B800000, 0xCB800000, 0x38800000, 0xB8800000 // two**24, -two**24
+ // two**-14, -two**-14
+data4 0x4E44152A, 0xA2F9836E, 0x00003FFE, 0x00000000 // two_by_pi
+data4 0xCE81B9F1, 0xC84D32B0, 0x00004016, 0x00000000 // P_0
+data4 0x2168C235, 0xC90FDAA2, 0x00003FFF, 0x00000000 // P_1
+data4 0xFC8F8CBB, 0xECE675D1, 0x0000BFBD, 0x00000000 // P_2
+data4 0xACC19C60, 0xB7ED8FBB, 0x0000BF7C, 0x00000000 // P_3
+data4 0x5F000000, 0xDF000000, 0x00000000, 0x00000000 // two_to_63, -two_to_63
+data4 0x6EC6B45A, 0xA397E504, 0x00003FE7, 0x00000000 // Inv_P_0
+data4 0xDBD171A1, 0x8D848E89, 0x0000BFBF, 0x00000000 // d_1
+data4 0x18A66F8E, 0xD5394C36, 0x0000BF7C, 0x00000000 // d_2
+data4 0x2168C234, 0xC90FDAA2, 0x00003FFE, 0x00000000 // PI_BY_4
+data4 0x2168C234, 0xC90FDAA2, 0x0000BFFE, 0x00000000 // MPI_BY_4
+data4 0x3E800000, 0xBE800000, 0x00000000, 0x00000000 // two**-2, -two**-2
+data4 0x2F000000, 0xAF000000, 0x00000000, 0x00000000 // two**-33, -two**-33
+data4 0xAAAAAABD, 0xAAAAAAAA, 0x00003FFD, 0x00000000 // P1_1
+data4 0x88882E6A, 0x88888888, 0x00003FFC, 0x00000000 // P1_2
+data4 0x0F0177B6, 0xDD0DD0DD, 0x00003FFA, 0x00000000 // P1_3
+data4 0x646B8C6D, 0xB327A440, 0x00003FF9, 0x00000000 // P1_4
+data4 0x1D5F7D20, 0x91371B25, 0x00003FF8, 0x00000000 // P1_5
+data4 0x61C67914, 0xEB69A5F1, 0x00003FF6, 0x00000000 // P1_6
+data4 0x019318D2, 0xBEDD37BE, 0x00003FF5, 0x00000000 // P1_7
+data4 0x3C794015, 0x9979B146, 0x00003FF4, 0x00000000 // P1_8
+data4 0x8C6EB58A, 0x8EBD21A3, 0x00003FF3, 0x00000000 // P1_9
+data4 0xAAAAAAB4, 0xAAAAAAAA, 0x00003FFD, 0x00000000 // Q1_1
+data4 0x0B5FC93E, 0xB60B60B6, 0x00003FF9, 0x00000000 // Q1_2
+data4 0x0C9BBFBF, 0x8AB355E0, 0x00003FF6, 0x00000000 // Q1_3
+data4 0xCBEE3D4C, 0xDDEBBC89, 0x00003FF2, 0x00000000 // Q1_4
+data4 0x5F80BBB6, 0xB3548A68, 0x00003FEF, 0x00000000 // Q1_5
+data4 0x4CED5BF1, 0x91362560, 0x00003FEC, 0x00000000 // Q1_6
+data4 0x8EE92A83, 0xF189D95A, 0x00003FE8, 0x00000000 // Q1_7
+data4 0xAAAB362F, 0xAAAAAAAA, 0x00003FFD, 0x00000000 // P2_1
+data4 0xE97A6097, 0x88888886, 0x00003FFC, 0x00000000 // P2_2
+data4 0x25E716A1, 0xDD108EE0, 0x00003FFA, 0x00000000 // P2_3
+//
+// Entries T_hi double-precision memory format
+// Index = 0,1,...,31 B = 2^(-2)*(1+Index/32+1/64)
+// Entries T_lo single-precision memory format
+// Index = 0,1,...,31 B = 2^(-2)*(1+Index/32+1/64)
+//
+data4 0x62400794, 0x3FD09BC3, 0x23A05C32, 0x00000000
+data4 0xDFFBC074, 0x3FD124A9, 0x240078B2, 0x00000000
+data4 0x5BD4920F, 0x3FD1AE23, 0x23826B8E, 0x00000000
+data4 0x15E2701D, 0x3FD23835, 0x22D31154, 0x00000000
+data4 0x63739C2D, 0x3FD2C2E4, 0x2265C9E2, 0x00000000
+data4 0xAFEEA48B, 0x3FD34E36, 0x245C05EB, 0x00000000
+data4 0x7DBB35D1, 0x3FD3DA31, 0x24749F2D, 0x00000000
+data4 0x67321619, 0x3FD466DA, 0x2462CECE, 0x00000000
+data4 0x1F94A4D5, 0x3FD4F437, 0x246D0DF1, 0x00000000
+data4 0x740C3E6D, 0x3FD5824D, 0x240A85B5, 0x00000000
+data4 0x4CB1E73D, 0x3FD61123, 0x23F96E33, 0x00000000
+data4 0xAD9EA64B, 0x3FD6A0BE, 0x247C5393, 0x00000000
+data4 0xB804FD01, 0x3FD73125, 0x241F3B29, 0x00000000
+data4 0xAB53EE83, 0x3FD7C25E, 0x2479989B, 0x00000000
+data4 0xE6640EED, 0x3FD8546F, 0x23B343BC, 0x00000000
+data4 0xE8AF1892, 0x3FD8E75F, 0x241454D1, 0x00000000
+data4 0x53928BDA, 0x3FD97B35, 0x238613D9, 0x00000000
+data4 0xEB9DE4DE, 0x3FDA0FF6, 0x22859FA7, 0x00000000
+data4 0x99ECF92D, 0x3FDAA5AB, 0x237A6D06, 0x00000000
+data4 0x6D8F1796, 0x3FDB3C5A, 0x23952F6C, 0x00000000
+data4 0x9CFB8BE4, 0x3FDBD40A, 0x2280FC95, 0x00000000
+data4 0x87943100, 0x3FDC6CC3, 0x245D2EC0, 0x00000000
+data4 0xB736C500, 0x3FDD068C, 0x23C4AD7D, 0x00000000
+data4 0xE1DDBC31, 0x3FDDA16D, 0x23D076E6, 0x00000000
+data4 0xEB515A93, 0x3FDE3D6E, 0x244809A6, 0x00000000
+data4 0xE6E9E5F1, 0x3FDEDA97, 0x220856C8, 0x00000000
+data4 0x1963CE69, 0x3FDF78F1, 0x244BE993, 0x00000000
+data4 0x7D635BCE, 0x3FE00C41, 0x23D21799, 0x00000000
+data4 0x1C302CD3, 0x3FE05CAB, 0x248A1B1D, 0x00000000
+data4 0xDB6A1FA0, 0x3FE0ADB9, 0x23D53E33, 0x00000000
+data4 0x4A20BA81, 0x3FE0FF72, 0x24DB9ED5, 0x00000000
+data4 0x153FA6F5, 0x3FE151D9, 0x24E9E451, 0x00000000
+//
+// Entries T_hi double-precision memory format
+// Index = 0,1,...,19 B = 2^(-1)*(1+Index/32+1/64)
+// Entries T_lo single-precision memory format
+// Index = 0,1,...,19 B = 2^(-1)*(1+Index/32+1/64)
+//
+data4 0xBA1BE39E, 0x3FE1CEC4, 0x24B60F9E, 0x00000000
+data4 0x5ABD9B2D, 0x3FE277E4, 0x248C2474, 0x00000000
+data4 0x0272B110, 0x3FE32418, 0x247B8311, 0x00000000
+data4 0x890E2DF0, 0x3FE3D38B, 0x24C55751, 0x00000000
+data4 0x46236871, 0x3FE4866D, 0x24E5BC34, 0x00000000
+data4 0x45E044B0, 0x3FE53CEE, 0x24001BA4, 0x00000000
+data4 0x82EC06E4, 0x3FE5F742, 0x24B973DC, 0x00000000
+data4 0x25DF43F9, 0x3FE6B5A1, 0x24895440, 0x00000000
+data4 0xCAFD348C, 0x3FE77844, 0x240021CA, 0x00000000
+data4 0xCEED6B92, 0x3FE83F6B, 0x24C45372, 0x00000000
+data4 0xA34F3665, 0x3FE90B58, 0x240DAD33, 0x00000000
+data4 0x2C1E56B4, 0x3FE9DC52, 0x24F846CE, 0x00000000
+data4 0x27041578, 0x3FEAB2A4, 0x2323FB6E, 0x00000000
+data4 0x9DD8C373, 0x3FEB8E9F, 0x24B3090B, 0x00000000
+data4 0x65C9AA7B, 0x3FEC709B, 0x2449F611, 0x00000000
+data4 0xACCF8435, 0x3FED58F4, 0x23616A7E, 0x00000000
+data4 0x97635082, 0x3FEE480F, 0x24C2FEAE, 0x00000000
+data4 0xF0ACC544, 0x3FEF3E57, 0x242CE964, 0x00000000
+data4 0xF7E06E4B, 0x3FF01E20, 0x2480D3EE, 0x00000000
+data4 0x8A798A69, 0x3FF0A125, 0x24DB8967, 0x00000000
+//
+// Entries C_hi double-precision memory format
+// Index = 0,1,...,31 B = 2^(-2)*(1+Index/32+1/64)
+// Entries C_lo single-precision memory format
+// Index = 0,1,...,31 B = 2^(-2)*(1+Index/32+1/64)
+//
+data4 0xE63EFBD0, 0x400ED3E2, 0x259D94D4, 0x00000000
+data4 0xC515DAB5, 0x400DDDB4, 0x245F0537, 0x00000000
+data4 0xBE19A79F, 0x400CF57A, 0x25D4EA9F, 0x00000000
+data4 0xD15298ED, 0x400C1A06, 0x24AE40A0, 0x00000000
+data4 0x164B2708, 0x400B4A4C, 0x25A5AAB6, 0x00000000
+data4 0x5285B068, 0x400A855A, 0x25524F18, 0x00000000
+data4 0x3FFA549F, 0x4009CA5A, 0x24C999C0, 0x00000000
+data4 0x646AF623, 0x4009188A, 0x254FD801, 0x00000000
+data4 0x6084D0E7, 0x40086F3C, 0x2560F5FD, 0x00000000
+data4 0xA29A76EE, 0x4007CDD2, 0x255B9D19, 0x00000000
+data4 0x6C8ECA95, 0x400733BE, 0x25CB021B, 0x00000000
+data4 0x1F8DDC52, 0x4006A07E, 0x24AB4722, 0x00000000
+data4 0xC298AD58, 0x4006139B, 0x252764E2, 0x00000000
+data4 0xBAD7164B, 0x40058CAB, 0x24DAF5DB, 0x00000000
+data4 0xAE31A5D3, 0x40050B4B, 0x25EA20F4, 0x00000000
+data4 0x89F85A8A, 0x40048F21, 0x2583A3E8, 0x00000000
+data4 0xA862380D, 0x400417DA, 0x25DCC4CC, 0x00000000
+data4 0x1088FCFE, 0x4003A52B, 0x2430A492, 0x00000000
+data4 0xCD3527D5, 0x400336CC, 0x255F77CF, 0x00000000
+data4 0x5760766D, 0x4002CC7F, 0x25DA0BDA, 0x00000000
+data4 0x11CE02E3, 0x40026607, 0x256FF4A2, 0x00000000
+data4 0xD37BBE04, 0x4002032C, 0x25208AED, 0x00000000
+data4 0x7F050775, 0x4001A3BD, 0x24B72DD6, 0x00000000
+data4 0xA554848A, 0x40014789, 0x24AB4DAA, 0x00000000
+data4 0x323E81B7, 0x4000EE65, 0x2584C440, 0x00000000
+data4 0x21CF1293, 0x40009827, 0x25C9428D, 0x00000000
+data4 0x3D415EEB, 0x400044A9, 0x25DC8482, 0x00000000
+data4 0xBD72C577, 0x3FFFE78F, 0x257F5070, 0x00000000
+data4 0x75EFD28E, 0x3FFF4AC3, 0x23EBBF7A, 0x00000000
+data4 0x60B52DDE, 0x3FFEB2AF, 0x22EECA07, 0x00000000
+data4 0x35204180, 0x3FFE1F19, 0x24191079, 0x00000000
+data4 0x54F7E60A, 0x3FFD8FCA, 0x248D3058, 0x00000000
+//
+// Entries C_hi double-precision memory format
+// Index = 0,1,...,19 B = 2^(-1)*(1+Index/32+1/64)
+// Entries C_lo single-precision memory format
+// Index = 0,1,...,19 B = 2^(-1)*(1+Index/32+1/64)
+//
+data4 0x79F6FADE, 0x3FFCC06A, 0x239C7886, 0x00000000
+data4 0x891662A6, 0x3FFBB91F, 0x250BD191, 0x00000000
+data4 0x529F155D, 0x3FFABFB6, 0x256CC3E6, 0x00000000
+data4 0x2E964AE9, 0x3FF9D300, 0x250843E3, 0x00000000
+data4 0x89DCB383, 0x3FF8F1EF, 0x2277C87E, 0x00000000
+data4 0x7C87DBD6, 0x3FF81B93, 0x256DA6CF, 0x00000000
+data4 0x1042EDE4, 0x3FF74F14, 0x2573D28A, 0x00000000
+data4 0x1784B360, 0x3FF68BAF, 0x242E489A, 0x00000000
+data4 0x7C923C4C, 0x3FF5D0B5, 0x2532D940, 0x00000000
+data4 0xF418EF20, 0x3FF51D88, 0x253C7DD6, 0x00000000
+data4 0x02F88DAE, 0x3FF4719A, 0x23DB59BF, 0x00000000
+data4 0x49DA0788, 0x3FF3CC66, 0x252B4756, 0x00000000
+data4 0x0B980DB8, 0x3FF32D77, 0x23FE585F, 0x00000000
+data4 0xE56C987A, 0x3FF2945F, 0x25378A63, 0x00000000
+data4 0xB16523F6, 0x3FF200BD, 0x247BB2E0, 0x00000000
+data4 0x8CE27778, 0x3FF17235, 0x24446538, 0x00000000
+data4 0xFDEFE692, 0x3FF0E873, 0x2514638F, 0x00000000
+data4 0x33154062, 0x3FF0632C, 0x24A7FC27, 0x00000000
+data4 0xB3EF115F, 0x3FEFC42E, 0x248FD0FE, 0x00000000
+data4 0x135D26F6, 0x3FEEC9E8, 0x2385C719, 0x00000000
+//
+// Entries SC_inv in Swapped IEEE format (extended)
+// Index = 0,1,...,31 B = 2^(-2)*(1+Index/32+1/64)
+//
+data4 0x1BF30C9E, 0x839D6D4A, 0x00004001, 0x00000000
+data4 0x554B0EB0, 0x80092804, 0x00004001, 0x00000000
+data4 0xA1CF0DE9, 0xF959F94C, 0x00004000, 0x00000000
+data4 0x77378677, 0xF3086BA0, 0x00004000, 0x00000000
+data4 0xCCD4723C, 0xED154515, 0x00004000, 0x00000000
+data4 0x1C27CF25, 0xE7790944, 0x00004000, 0x00000000
+data4 0x8DDACB88, 0xE22D037D, 0x00004000, 0x00000000
+data4 0x89C73522, 0xDD2B2D8A, 0x00004000, 0x00000000
+data4 0xBB2C1171, 0xD86E1A23, 0x00004000, 0x00000000
+data4 0xDFF5E0F9, 0xD3F0E288, 0x00004000, 0x00000000
+data4 0x283BEBD5, 0xCFAF16B1, 0x00004000, 0x00000000
+data4 0x0D88DD53, 0xCBA4AFAA, 0x00004000, 0x00000000
+data4 0xCA67C43D, 0xC7CE03CC, 0x00004000, 0x00000000
+data4 0x0CA0DDB0, 0xC427BC82, 0x00004000, 0x00000000
+data4 0xF13D8CAB, 0xC0AECD57, 0x00004000, 0x00000000
+data4 0x71ECE6B1, 0xBD606C38, 0x00004000, 0x00000000
+data4 0xA44C4929, 0xBA3A0A96, 0x00004000, 0x00000000
+data4 0xE5CCCEC1, 0xB7394F6F, 0x00004000, 0x00000000
+data4 0x9637D8BC, 0xB45C1203, 0x00004000, 0x00000000
+data4 0x92CB051B, 0xB1A05528, 0x00004000, 0x00000000
+data4 0x6BA2FFD0, 0xAF04432B, 0x00004000, 0x00000000
+data4 0x7221235F, 0xAC862A23, 0x00004000, 0x00000000
+data4 0x5F00A9D1, 0xAA2478AF, 0x00004000, 0x00000000
+data4 0x81E082BF, 0xA7DDBB0C, 0x00004000, 0x00000000
+data4 0x45684FEE, 0xA5B0987D, 0x00004000, 0x00000000
+data4 0x627A8F53, 0xA39BD0F5, 0x00004000, 0x00000000
+data4 0x6EC5C8B0, 0xA19E3B03, 0x00004000, 0x00000000
+data4 0x91CD7C66, 0x9FB6C1F0, 0x00004000, 0x00000000
+data4 0x1FA3DF8A, 0x9DE46410, 0x00004000, 0x00000000
+data4 0xA8F6B888, 0x9C263139, 0x00004000, 0x00000000
+data4 0xC27B0450, 0x9A7B4968, 0x00004000, 0x00000000
+data4 0x5EE614EE, 0x98E2DB7E, 0x00004000, 0x00000000
+//
+// Entries SC_inv in Swapped IEEE format (extended)
+// Index = 0,1,...,19 B = 2^(-1)*(1+Index/32+1/64)
+//
+data4 0x13B2B5BA, 0x969F335C, 0x00004000, 0x00000000
+data4 0xD4C0F548, 0x93D446D9, 0x00004000, 0x00000000
+data4 0x61B798AF, 0x9147094F, 0x00004000, 0x00000000
+data4 0x758787AC, 0x8EF317CC, 0x00004000, 0x00000000
+data4 0xB99EEFDB, 0x8CD498B3, 0x00004000, 0x00000000
+data4 0xDFF8BC37, 0x8AE82A7D, 0x00004000, 0x00000000
+data4 0xE3C55D42, 0x892AD546, 0x00004000, 0x00000000
+data4 0xD15573C1, 0x8799FEA9, 0x00004000, 0x00000000
+data4 0x435A4B4C, 0x86335F88, 0x00004000, 0x00000000
+data4 0x3E93A87B, 0x84F4FB6E, 0x00004000, 0x00000000
+data4 0x80A382FB, 0x83DD1952, 0x00004000, 0x00000000
+data4 0xA4CB8C9E, 0x82EA3D7F, 0x00004000, 0x00000000
+data4 0x6861D0A8, 0x821B247C, 0x00004000, 0x00000000
+data4 0x63E8D244, 0x816EBED1, 0x00004000, 0x00000000
+data4 0x27E4CFC6, 0x80E42D91, 0x00004000, 0x00000000
+data4 0x28E64AFD, 0x807ABF8D, 0x00004000, 0x00000000
+data4 0x863B4FD8, 0x8031EF26, 0x00004000, 0x00000000
+data4 0xAE8C11FD, 0x800960AD, 0x00004000, 0x00000000
+data4 0x5FDBEC21, 0x8000E147, 0x00004000, 0x00000000
+data4 0xA07791FA, 0x80186650, 0x00004000, 0x00000000
+ASM_SIZE_DIRECTIVE(TANL_BASE_CONSTANTS)
+
+Arg = f8
+Result = f8
+fp_tmp = f9
+U_2 = f10
+rsq = f11
+C_hi = f12
+C_lo = f13
+T_hi = f14
+T_lo = f15
+
+N_0 = f32
+d_1 = f33
+MPI_BY_4 = f34
+tail = f35
+tanx = f36
+Cx = f37
+Sx = f38
+sgn_r = f39
+CORR = f40
+P = f41
+D = f42
+ArgPrime = f43
+P_0 = f44
+
+P2_1 = f45
+P2_2 = f46
+P2_3 = f47
+
+P1_1 = f45
+P1_2 = f46
+P1_3 = f47
+
+P1_4 = f48
+P1_5 = f49
+P1_6 = f50
+P1_7 = f51
+P1_8 = f52
+P1_9 = f53
+
+TWO_TO_63 = f54
+NEGTWO_TO_63 = f55
+x = f56
+xsq = f57
+Tx = f58
+Tx1 = f59
+Set = f60
+poly1 = f61
+poly2 = f62
+Poly = f63
+Poly1 = f64
+Poly2 = f65
+r_to_the_8 = f66
+B = f67
+SC_inv = f68
+Pos_r = f69
+N_0_fix = f70
+PI_BY_4 = f71
+NEGTWO_TO_NEG2 = f72
+TWO_TO_24 = f73
+TWO_TO_NEG14 = f74
+TWO_TO_NEG33 = f75
+NEGTWO_TO_24 = f76
+NEGTWO_TO_NEG14 = f76
+NEGTWO_TO_NEG33 = f77
+two_by_PI = f78
+N = f79
+N_fix = f80
+P_1 = f81
+P_2 = f82
+P_3 = f83
+s_val = f84
+w = f85
+c = f86
+r = f87
+A = f89
+a = f90
+t = f91
+U_1 = f92
+d_2 = f93
+TWO_TO_NEG2 = f94
+Q1_1 = f95
+Q1_2 = f96
+Q1_3 = f97
+Q1_4 = f98
+Q1_5 = f99
+Q1_6 = f100
+Q1_7 = f101
+Q1_8 = f102
+S_hi = f103
+S_lo = f104
+V_hi = f105
+V_lo = f106
+U_hi = f107
+U_lo = f108
+U_hiabs = f109
+V_hiabs = f110
+V = f111
+Inv_P_0 = f112
+
+GR_SAVE_B0 = r33
+GR_SAVE_GP = r34
+GR_SAVE_PFS = r35
+delta1 = r36
+table_ptr1 = r37
+table_ptr2 = r38
+i_0 = r39
+i_1 = r40
+N_fix_gr = r41
+N_inc = r42
+exp_Arg = r43
+exp_r = r44
+sig_r = r45
+lookup = r46
+table_offset = r47
+Create_B = r48
+gr_tmp = r49
+
+.section .text
+.global tanl
+.proc tanl
+tanl:
+#ifdef _LIBC
+.global __tanl
+.proc __tanl
+__tanl:
+#endif
+{ .mfi
+alloc r32 = ar.pfs, 0,17,2,0
+(p0) fclass.m.unc p6,p0 = Arg, 0x1E7
+ addl gr_tmp = -1,r0
+}
+{ .mfi
+ nop.m 0
+(p0) fclass.nm.unc p7,p0 = Arg, 0x1FF
+ nop.i 0
+};;
+
+{ .mfi
+(p0) addl table_ptr1 = @ltoff(TANL_BASE_CONSTANTS), gp
+ nop.f 999
+ nop.i 0
+}
+;;
+{ .mmi
+(p0) ld8 table_ptr1 = [table_ptr1]
+ setf.sig fp_tmp = gr_tmp // Make a constant so fmpy produces inexact
+ nop.i 999
+}
+;;
+
+//
+// Check for NatVals, Infs , NaNs, and Zeros
+// Check for everything - if false, then must be pseudo-zero
+// or pseudo-nan.
+// Local table pointer
+//
+{ .mbb
+(p0) add table_ptr2 = 96, table_ptr1
+(p6) br.cond.spnt L(TANL_SPECIAL)
+(p7) br.cond.spnt L(TANL_SPECIAL) ;;
+}
+//
+// Point to Inv_P_0
+// Branch out to deal with unsupporteds and special values.
+//
+{ .mmf
+(p0) ldfs TWO_TO_24 = [table_ptr1],4
+(p0) ldfs TWO_TO_63 = [table_ptr2],4
+//
+// Load -2**24, load -2**63.
+//
+(p0) fcmp.eq.s0 p0, p6 = Arg, f1 ;;
+}
+{ .mfi
+(p0) ldfs NEGTWO_TO_63 = [table_ptr2],12
+(p0) fnorm.s1 Arg = Arg
+ nop.i 999
+}
+//
+// Load 2**24, Load 2**63.
+//
+{ .mmi
+(p0) ldfs NEGTWO_TO_24 = [table_ptr1],12 ;;
+//
+// Do fcmp to generate Denormal exception
+// - can't do FNORM (will generate Underflow when U is unmasked!)
+// Normalize input argument.
+//
+(p0) ldfe two_by_PI = [table_ptr1],16
+ nop.i 999
+}
+{ .mmi
+(p0) ldfe Inv_P_0 = [table_ptr2],16 ;;
+(p0) ldfe d_1 = [table_ptr2],16
+ nop.i 999
+}
+//
+// Decide about the paths to take:
+// PR_1 and PR_3 set if -2**24 < Arg < 2**24 - CASE 1 OR 2
+// OTHERWISE - CASE 3 OR 4
+// Load inverse of P_0 .
+// Set PR_6 if Arg <= -2**63
+// Are there any Infs, NaNs, or zeros?
+//
+{ .mmi
+(p0) ldfe P_0 = [table_ptr1],16 ;;
+(p0) ldfe d_2 = [table_ptr2],16
+ nop.i 999
+}
+//
+// Set PR_8 if Arg <= -2**24
+// Set PR_6 if Arg >= 2**63
+//
+{ .mmi
+(p0) ldfe P_1 = [table_ptr1],16 ;;
+(p0) ldfe PI_BY_4 = [table_ptr2],16
+ nop.i 999
+}
+//
+// Set PR_8 if Arg >= 2**24
+//
+{ .mmi
+(p0) ldfe P_2 = [table_ptr1],16 ;;
+(p0) ldfe MPI_BY_4 = [table_ptr2],16
+ nop.i 999
+}
+//
+// Load P_2 and PI_BY_4
+//
+{ .mfi
+(p0) ldfe P_3 = [table_ptr1],16
+ nop.f 999
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fcmp.le.unc.s1 p6,p7 = Arg,NEGTWO_TO_63
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fcmp.le.unc.s1 p8,p9 = Arg,NEGTWO_TO_24
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p7) fcmp.ge.s1 p6,p0 = Arg,TWO_TO_63
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p9) fcmp.ge.s1 p8,p0 = Arg,TWO_TO_24
+ nop.i 999 ;;
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+//
+// Load P_3 and -PI_BY_4
+//
+(p6) br.cond.spnt L(TANL_ARG_TOO_LARGE) ;;
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+//
+// Load 2**(-2).
+// Load -2**(-2).
+// Branch out if we have a special argument.
+// Branch out if the magnitude of the input argument is too large
+// - do this branch before the next.
+//
+(p8) br.cond.spnt L(TANL_LARGER_ARG) ;;
+}
+//
+// Branch to Cases 3 or 4 if Arg <= -2**24 or Arg >= 2**24
+//
+{ .mfi
+(p0) ldfs TWO_TO_NEG2 = [table_ptr2],4
+// ARGUMENT REDUCTION CODE - CASE 1 and 2
+// Load 2**(-2).
+// Load -2**(-2).
+(p0) fmpy.s1 N = Arg,two_by_PI
+ nop.i 999 ;;
+}
+{ .mfi
+(p0) ldfs NEGTWO_TO_NEG2 = [table_ptr2],12
+//
+// N = Arg * 2/pi
+//
+(p0) fcmp.lt.unc.s1 p8,p9= Arg,PI_BY_4
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// if Arg < pi/4, set PR_8.
+//
+(p8) fcmp.gt.s1 p8,p9= Arg,MPI_BY_4
+ nop.i 999 ;;
+}
+//
+// Case 1: Is |r| < 2**(-2).
+// Arg is the same as r in this case.
+// r = Arg
+// c = 0
+//
+{ .mfi
+(p8) mov N_fix_gr = r0
+//
+// if Arg > -pi/4, reset PR_8.
+// Select the case when |Arg| < pi/4 - set PR[8] = true.
+// Else Select the case when |Arg| >= pi/4 - set PR[9] = true.
+//
+(p0) fcvt.fx.s1 N_fix = N
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Grab the integer part of N .
+//
+(p8) mov r = Arg
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p8) mov c = f0
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p8) fcmp.lt.unc.s1 p10, p11 = Arg, TWO_TO_NEG2
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p10) fcmp.gt.s1 p10,p0 = Arg, NEGTWO_TO_NEG2
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Case 2: Place integer part of N in GP register.
+//
+(p9) fcvt.xf N = N_fix
+ nop.i 999 ;;
+}
+{ .mib
+(p9) getf.sig N_fix_gr = N_fix
+ nop.i 999
+//
+// Case 2: Convert integer N_fix back to normalized floating-point value.
+//
+(p10) br.cond.spnt L(TANL_SMALL_R) ;;
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+(p8) br.cond.sptk L(TANL_NORMAL_R) ;;
+}
+//
+// Case 1: PR_3 is only affected when PR_1 is set.
+//
+{ .mmi
+(p9) ldfs TWO_TO_NEG33 = [table_ptr2], 4 ;;
+//
+// Case 2: Load 2**(-33).
+//
+(p9) ldfs NEGTWO_TO_NEG33 = [table_ptr2], 4
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Case 2: Load -2**(-33).
+//
+(p9) fnma.s1 s_val = N, P_1, Arg
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p9) fmpy.s1 w = N, P_2
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Case 2: w = N * P_2
+// Case 2: s_val = -N * P_1 + Arg
+//
+(p0) fcmp.lt.unc.s1 p9,p8 = s_val, TWO_TO_NEG33
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Decide between case_1 and case_2 reduce:
+//
+(p9) fcmp.gt.s1 p9, p8 = s_val, NEGTWO_TO_NEG33
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Case 1_reduce: s <= -2**(-33) or s >= 2**(-33)
+// Case 2_reduce: -2**(-33) < s < 2**(-33)
+//
+(p8) fsub.s1 r = s_val, w
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p9) fmpy.s1 w = N, P_3
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p9) fma.s1 U_1 = N, P_2, w
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+// Case 1_reduce: Is |r| < 2**(-2), if so set PR_10
+// else set PR_11.
+//
+(p8) fsub.s1 c = s_val, r
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Case 1_reduce: r = s + w (change sign)
+// Case 2_reduce: w = N * P_3 (change sign)
+//
+(p8) fcmp.lt.unc.s1 p10, p11 = r, TWO_TO_NEG2
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p10) fcmp.gt.s1 p10, p11 = r, NEGTWO_TO_NEG2
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p9) fsub.s1 r = s_val, U_1
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+// Case 1_reduce: c is complete here.
+// c = c + w (w has not been negated.)
+// Case 2_reduce: r is complete here - continue to calculate c .
+// r = s - U_1
+//
+(p9) fms.s1 U_2 = N, P_2, U_1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Case 1_reduce: c = s - r
+// Case 2_reduce: U_1 = N * P_2 + w
+//
+(p8) fsub.s1 c = c, w
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p9) fsub.s1 s_val = s_val, r
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+//
+// Case 2_reduce:
+// U_2 = N * P_2 - U_1
+// Not needed until later.
+//
+(p9) fadd.s1 U_2 = U_2, w
+//
+// Case 2_reduce:
+// s = s - r
+// U_2 = U_2 + w
+//
+(p10) br.cond.spnt L(TANL_SMALL_R) ;;
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+(p11) br.cond.sptk L(TANL_NORMAL_R) ;;
+}
+{ .mii
+ nop.m 999
+//
+// Case 2_reduce:
+// c = c - U_2
+// c is complete here
+// Argument reduction ends here.
+//
+(p9) extr.u i_1 = N_fix_gr, 0, 1 ;;
+(p9) cmp.eq.unc p11, p12 = 0x0000,i_1 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Is i_1 even or odd?
+// if i_1 == 0, set p11, else set p12.
+//
+(p11) fmpy.s1 rsq = r, r
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p12) frcpa.s1 S_hi,p0 = f1, r
+ nop.i 999
+}
+
+
+
+//
+// Case 1: Branch to SMALL_R or NORMAL_R.
+// Case 1 is done now.
+//
+
+{ .mfi
+(p9) addl table_ptr1 = @ltoff(TANL_BASE_CONSTANTS), gp
+(p9) fsub.s1 c = s_val, U_1
+ nop.i 999 ;;
+}
+;;
+
+{ .mmi
+(p9) ld8 table_ptr1 = [table_ptr1]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+
+{ .mmi
+(p9) add table_ptr1 = 224, table_ptr1 ;;
+(p9) ldfe P1_1 = [table_ptr1],144
+ nop.i 999 ;;
+}
+//
+// Get [i_1] - lsb of N_fix_gr .
+// Load P1_1 and point to Q1_1 .
+//
+{ .mfi
+(p9) ldfe Q1_1 = [table_ptr1] , 0
+//
+// N even: rsq = r * Z
+// N odd: S_hi = frcpa(r)
+//
+(p12) fmerge.ns S_hi = S_hi, S_hi
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+// Case 2_reduce:
+// c = s - U_1
+//
+(p9) fsub.s1 c = c, U_2
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p12) fma.s1 poly1 = S_hi, r, f1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N odd: Change sign of S_hi
+//
+(p11) fmpy.s1 rsq = rsq, P1_1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p12) fma.s1 S_hi = S_hi, poly1, S_hi
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N even: rsq = rsq * P1_1
+// N odd: poly1 = 1.0 + S_hi * r 16 bits partial account for necessary
+//
+(p11) fma.s1 Result = r, rsq, c
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N even: Result = c + r * rsq
+// N odd: S_hi = S_hi + S_hi*poly1 16 bits account for necessary
+//
+(p12) fma.s1 poly1 = S_hi, r, f1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N even: Result = Result + r
+// N odd: poly1 = 1.0 + S_hi * r 32 bits partial
+//
+(p11) fadd.s0 Result = r, Result
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p12) fma.s1 S_hi = S_hi, poly1, S_hi
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N even: Result1 = Result + r
+// N odd: S_hi = S_hi * poly1 + S_hi 32 bits
+//
+(p12) fma.s1 poly1 = S_hi, r, f1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N odd: poly1 = S_hi * r + 1.0 64 bits partial
+//
+(p12) fma.s1 S_hi = S_hi, poly1, S_hi
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N odd: poly1 = S_hi * poly + 1.0 64 bits
+//
+(p12) fma.s1 poly1 = S_hi, r, f1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N odd: poly1 = S_hi * r + 1.0
+//
+(p12) fma.s1 poly1 = S_hi, c, poly1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N odd: poly1 = S_hi * c + poly1
+//
+(p12) fmpy.s1 S_lo = S_hi, poly1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N odd: S_lo = S_hi * poly1
+//
+(p12) fma.s1 S_lo = Q1_1, r, S_lo
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+// N odd: Result = S_hi + S_lo
+//
+(p0) fmpy.s0 fp_tmp = fp_tmp, fp_tmp // Dummy mult to set inexact
+ nop.i 999 ;;
+}
+{ .mfb
+ nop.m 999
+//
+// N odd: S_lo = S_lo + Q1_1 * r
+//
+(p12) fadd.s0 Result = S_hi, S_lo
+(p0) br.ret.sptk b0 ;;
+}
+
+
+L(TANL_LARGER_ARG):
+
+//
+// ARGUMENT REDUCTION CODE - CASE 3 and 4
+//
+
+{ .mfi
+(p0) addl table_ptr1 = @ltoff(TANL_BASE_CONSTANTS), gp
+(p0) fmpy.s1 N_0 = Arg, Inv_P_0
+ nop.i 999
+}
+;;
+
+{ .mmi
+(p0) ld8 table_ptr1 = [table_ptr1]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+
+//
+// Adjust table_ptr1 to beginning of table.
+// N_0 = Arg * Inv_P_0
+//
+{ .mmi
+(p0) add table_ptr1 = 8, table_ptr1 ;;
+//
+// Point to 2*-14
+//
+(p0) ldfs TWO_TO_NEG14 = [table_ptr1], 4
+ nop.i 999 ;;
+}
+//
+// Load 2**(-14).
+//
+{ .mmi
+(p0) ldfs NEGTWO_TO_NEG14 = [table_ptr1], 180 ;;
+//
+// N_0_fix = integer part of N_0 .
+// Adjust table_ptr1 to beginning of table.
+//
+(p0) ldfs TWO_TO_NEG2 = [table_ptr1], 4
+ nop.i 999 ;;
+}
+//
+// Make N_0 the integer part.
+//
+{ .mfi
+(p0) ldfs NEGTWO_TO_NEG2 = [table_ptr1]
+//
+// Load -2**(-14).
+//
+(p0) fcvt.fx.s1 N_0_fix = N_0
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fcvt.xf N_0 = N_0_fix
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fnma.s1 ArgPrime = N_0, P_0, Arg
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fmpy.s1 w = N_0, d_1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// ArgPrime = -N_0 * P_0 + Arg
+// w = N_0 * d_1
+//
+(p0) fmpy.s1 N = ArgPrime, two_by_PI
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N = ArgPrime * 2/pi
+//
+(p0) fcvt.fx.s1 N_fix = N
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N_fix is the integer part.
+//
+(p0) fcvt.xf N = N_fix
+ nop.i 999 ;;
+}
+{ .mfi
+(p0) getf.sig N_fix_gr = N_fix
+ nop.f 999
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N is the integer part of the reduced-reduced argument.
+// Put the integer in a GP register.
+//
+(p0) fnma.s1 s_val = N, P_1, ArgPrime
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fnma.s1 w = N, P_2, w
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// s_val = -N*P_1 + ArgPrime
+// w = -N*P_2 + w
+//
+(p0) fcmp.lt.unc.s1 p11, p10 = s_val, TWO_TO_NEG14
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p11) fcmp.gt.s1 p11, p10 = s_val, NEGTWO_TO_NEG14
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Case 3: r = s_val + w (Z complete)
+// Case 4: U_hi = N_0 * d_1
+//
+(p10) fmpy.s1 V_hi = N, P_2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p11) fmpy.s1 U_hi = N_0, d_1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Case 3: r = s_val + w (Z complete)
+// Case 4: U_hi = N_0 * d_1
+//
+(p11) fmpy.s1 V_hi = N, P_2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p11) fmpy.s1 U_hi = N_0, d_1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Decide between case 3 and 4:
+// Case 3: s <= -2**(-14) or s >= 2**(-14)
+// Case 4: -2**(-14) < s < 2**(-14)
+//
+(p10) fadd.s1 r = s_val, w
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p11) fmpy.s1 w = N, P_3
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Case 4: We need abs of both U_hi and V_hi - dont
+// worry about switched sign of V_hi .
+//
+(p11) fsub.s1 A = U_hi, V_hi
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+// Case 4: A = U_hi + V_hi
+// Note: Worry about switched sign of V_hi, so subtract instead of add.
+//
+(p11) fnma.s1 V_lo = N, P_2, V_hi
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p11) fms.s1 U_lo = N_0, d_1, U_hi
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p11) fabs V_hiabs = V_hi
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+// Case 4: V_hi = N * P_2
+// w = N * P_3
+// Note the product does not include the (-) as in the writeup
+// so (-) missing for V_hi and w .
+(p10) fadd.s1 r = s_val, w
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Case 3: c = s_val - r
+// Case 4: U_lo = N_0 * d_1 - U_hi
+//
+(p11) fabs U_hiabs = U_hi
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p11) fmpy.s1 w = N, P_3
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Case 4: Set P_12 if U_hiabs >= V_hiabs
+//
+(p11) fadd.s1 C_hi = s_val, A
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Case 4: C_hi = s_val + A
+//
+(p11) fadd.s1 t = U_lo, V_lo
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Case 3: Is |r| < 2**(-2), if so set PR_7
+// else set PR_8.
+// Case 3: If PR_7 is set, prepare to branch to Small_R.
+// Case 3: If PR_8 is set, prepare to branch to Normal_R.
+//
+(p10) fsub.s1 c = s_val, r
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Case 3: c = (s - r) + w (c complete)
+//
+(p11) fcmp.ge.unc.s1 p12, p13 = U_hiabs, V_hiabs
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p11) fms.s1 w = N_0, d_2, w
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Case 4: V_hi = N * P_2
+// w = N * P_3
+// Note the product does not include the (-) as in the writeup
+// so (-) missing for V_hi and w .
+//
+(p10) fcmp.lt.unc.s1 p14, p15 = r, TWO_TO_NEG2
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p14) fcmp.gt.s1 p14, p15 = r, NEGTWO_TO_NEG2
+ nop.i 999 ;;
+}
+{ .mfb
+ nop.m 999
+//
+// Case 4: V_lo = -N * P_2 - V_hi (U_hi is in place of V_hi in writeup)
+// Note: the (-) is still missing for V_hi .
+// Case 4: w = w + N_0 * d_2
+// Note: the (-) is now incorporated in w .
+//
+(p10) fadd.s1 c = c, w
+//
+// Case 4: t = U_lo + V_lo
+// Note: remember V_lo should be (-), subtract instead of add. NO
+//
+(p14) br.cond.spnt L(TANL_SMALL_R) ;;
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+(p15) br.cond.spnt L(TANL_NORMAL_R) ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Case 3: Vector off when |r| < 2**(-2). Recall that PR_3 will be true.
+// The remaining stuff is for Case 4.
+//
+(p12) fsub.s1 a = U_hi, A
+(p11) extr.u i_1 = N_fix_gr, 0, 1 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Case 4: C_lo = s_val - C_hi
+//
+(p11) fadd.s1 t = t, w
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p13) fadd.s1 a = V_hi, A
+ nop.i 999 ;;
+}
+
+
+
+//
+// Case 4: a = U_hi - A
+// a = V_hi - A (do an add to account for missing (-) on V_hi
+//
+
+{ .mfi
+(p11) addl table_ptr1 = @ltoff(TANL_BASE_CONSTANTS), gp
+(p11) fsub.s1 C_lo = s_val, C_hi
+ nop.i 999
+}
+;;
+
+
+
+//
+// Case 4: a = (U_hi - A) + V_hi
+// a = (V_hi - A) + U_hi
+// In each case account for negative missing form V_hi .
+//
+
+
+{ .mmi
+(p11) ld8 table_ptr1 = [table_ptr1]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+
+//
+// Case 4: C_lo = (s_val - C_hi) + A
+//
+{ .mmi
+(p11) add table_ptr1 = 224, table_ptr1 ;;
+(p11) ldfe P1_1 = [table_ptr1], 16
+ nop.i 999 ;;
+}
+{ .mfi
+(p11) ldfe P1_2 = [table_ptr1], 128
+//
+// Case 4: w = U_lo + V_lo + w
+//
+(p12) fsub.s1 a = a, V_hi
+ nop.i 999 ;;
+}
+//
+// Case 4: r = C_hi + C_lo
+//
+{ .mfi
+(p11) ldfe Q1_1 = [table_ptr1], 16
+(p11) fadd.s1 C_lo = C_lo, A
+ nop.i 999 ;;
+}
+//
+// Case 4: c = C_hi - r
+// Get [i_1] - lsb of N_fix_gr.
+//
+{ .mfi
+(p11) ldfe Q1_2 = [table_ptr1], 16
+ nop.f 999
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p13) fsub.s1 a = U_hi, a
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p11) fadd.s1 t = t, a
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Case 4: t = t + a
+//
+(p11) fadd.s1 C_lo = C_lo, t
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// Case 4: C_lo = C_lo + t
+//
+(p11) fadd.s1 r = C_hi, C_lo
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p11) fsub.s1 c = C_hi, r
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+// Case 4: c = c + C_lo finished.
+// Is i_1 even or odd?
+// if i_1 == 0, set PR_4, else set PR_5.
+//
+// r and c have been computed.
+// We known whether this is the sine or cosine routine.
+// Make sure ftz mode is set - should be automatic when using wre
+(p0) fmpy.s1 rsq = r, r
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p11) fadd.s1 c = c , C_lo
+(p11) cmp.eq.unc p11, p12 = 0x0000, i_1 ;;
+}
+{ .mfi
+ nop.m 999
+(p12) frcpa.s1 S_hi, p0 = f1, r
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+// N odd: Change sign of S_hi
+//
+(p11) fma.s1 Result = rsq, P1_2, P1_1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p12) fma.s1 P = rsq, Q1_2, Q1_1
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+// N odd: Result = S_hi + S_lo (User supplied rounding mode for C1)
+//
+(p0) fmpy.s0 fp_tmp = fp_tmp, fp_tmp // Dummy mult to set inexact
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N even: rsq = r * r
+// N odd: S_hi = frcpa(r)
+//
+(p12) fmerge.ns S_hi = S_hi, S_hi
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+// N even: rsq = rsq * P1_2 + P1_1
+// N odd: poly1 = 1.0 + S_hi * r 16 bits partial account for necessary
+//
+(p11) fmpy.s1 Result = rsq, Result
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p12) fma.s1 poly1 = S_hi, r,f1
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+// N even: Result = Result * rsq
+// N odd: S_hi = S_hi + S_hi*poly1 16 bits account for necessary
+//
+(p11) fma.s1 Result = r, Result, c
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p12) fma.s1 S_hi = S_hi, poly1, S_hi
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+// N odd: S_hi = S_hi * poly1 + S_hi 32 bits
+//
+(p11) fadd.s0 Result= r, Result
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p12) fma.s1 poly1 = S_hi, r, f1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N even: Result = Result * r + c
+// N odd: poly1 = 1.0 + S_hi * r 32 bits partial
+//
+(p12) fma.s1 S_hi = S_hi, poly1, S_hi
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p12) fma.s1 poly1 = S_hi, r, f1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N even: Result1 = Result + r (Rounding mode S0)
+// N odd: poly1 = S_hi * r + 1.0 64 bits partial
+//
+(p12) fma.s1 S_hi = S_hi, poly1, S_hi
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N odd: poly1 = S_hi * poly + S_hi 64 bits
+//
+(p12) fma.s1 poly1 = S_hi, r, f1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N odd: poly1 = S_hi * r + 1.0
+//
+(p12) fma.s1 poly1 = S_hi, c, poly1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N odd: poly1 = S_hi * c + poly1
+//
+(p12) fmpy.s1 S_lo = S_hi, poly1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N odd: S_lo = S_hi * poly1
+//
+(p12) fma.s1 S_lo = P, r, S_lo
+ nop.i 999 ;;
+}
+{ .mfb
+ nop.m 999
+//
+// N odd: S_lo = S_lo + r * P
+//
+(p12) fadd.s0 Result = S_hi, S_lo
+(p0) br.ret.sptk b0 ;;
+}
+
+
+L(TANL_SMALL_R):
+{ .mii
+ nop.m 999
+(p0) extr.u i_1 = N_fix_gr, 0, 1 ;;
+(p0) cmp.eq.unc p11, p12 = 0x0000, i_1
+}
+{ .mfi
+ nop.m 999
+(p0) fmpy.s1 rsq = r, r
+ nop.i 999 ;;
+}
+{ .mfi
+(p0) addl table_ptr1 = @ltoff(TANL_BASE_CONSTANTS), gp
+(p12) frcpa.s1 S_hi, p0 = f1, r
+ nop.i 999
+}
+;;
+
+
+{ .mmi
+(p0) ld8 table_ptr1 = [table_ptr1]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+// *****************************************************************
+// *****************************************************************
+// *****************************************************************
+
+
+{ .mmi
+(p0) add table_ptr1 = 224, table_ptr1 ;;
+(p0) ldfe P1_1 = [table_ptr1], 16
+ nop.i 999 ;;
+}
+// r and c have been computed.
+// We known whether this is the sine or cosine routine.
+// Make sure ftz mode is set - should be automatic when using wre
+// |r| < 2**(-2)
+{ .mfi
+(p0) ldfe P1_2 = [table_ptr1], 16
+(p11) fmpy.s1 r_to_the_8 = rsq, rsq
+ nop.i 999 ;;
+}
+//
+// Set table_ptr1 to beginning of constant table.
+// Get [i_1] - lsb of N_fix_gr.
+//
+{ .mfi
+(p0) ldfe P1_3 = [table_ptr1], 96
+//
+// N even: rsq = r * r
+// N odd: S_hi = frcpa(r)
+//
+(p12) fmerge.ns S_hi = S_hi, S_hi
+ nop.i 999 ;;
+}
+//
+// Is i_1 even or odd?
+// if i_1 == 0, set PR_11.
+// if i_1 != 0, set PR_12.
+//
+{ .mfi
+(p11) ldfe P1_9 = [table_ptr1], -16
+//
+// N even: Poly2 = P1_7 + Poly2 * rsq
+// N odd: poly2 = Q1_5 + poly2 * rsq
+//
+(p11) fadd.s1 CORR = rsq, f1
+ nop.i 999 ;;
+}
+{ .mmi
+(p11) ldfe P1_8 = [table_ptr1], -16 ;;
+//
+// N even: Poly1 = P1_2 + P1_3 * rsq
+// N odd: poly1 = 1.0 + S_hi * r
+// 16 bits partial account for necessary (-1)
+//
+(p11) ldfe P1_7 = [table_ptr1], -16
+ nop.i 999 ;;
+}
+//
+// N even: Poly1 = P1_1 + Poly1 * rsq
+// N odd: S_hi = S_hi + S_hi * poly1) 16 bits account for necessary
+//
+{ .mfi
+(p11) ldfe P1_6 = [table_ptr1], -16
+//
+// N even: Poly2 = P1_5 + Poly2 * rsq
+// N odd: poly2 = Q1_3 + poly2 * rsq
+//
+(p11) fmpy.s1 r_to_the_8 = r_to_the_8, r_to_the_8
+ nop.i 999 ;;
+}
+//
+// N even: Poly1 = Poly1 * rsq
+// N odd: poly1 = 1.0 + S_hi * r 32 bits partial
+//
+{ .mfi
+(p11) ldfe P1_5 = [table_ptr1], -16
+(p12) fma.s1 poly1 = S_hi, r, f1
+ nop.i 999 ;;
+}
+
+//
+// N even: CORR = CORR * c
+// N odd: S_hi = S_hi * poly1 + S_hi 32 bits
+//
+
+//
+// N even: Poly2 = P1_6 + Poly2 * rsq
+// N odd: poly2 = Q1_4 + poly2 * rsq
+//
+
+{ .mmf
+(p11) ldfe P1_4 = [table_ptr1], -16
+(p0) addl table_ptr2 = @ltoff(TANL_BASE_CONSTANTS), gp
+(p11) fmpy.s1 CORR = CORR, c
+}
+;;
+
+
+{ .mmi
+(p0) ld8 table_ptr2 = [table_ptr2]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+
+{ .mii
+(p0) add table_ptr2 = 464, table_ptr2
+ nop.i 999 ;;
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p11) fma.s1 Poly1 = P1_3, rsq, P1_2
+ nop.i 999 ;;
+}
+{ .mfi
+(p0) ldfe Q1_7 = [table_ptr2], -16
+(p12) fma.s1 S_hi = S_hi, poly1, S_hi
+ nop.i 999 ;;
+}
+{ .mfi
+(p0) ldfe Q1_6 = [table_ptr2], -16
+(p11) fma.s1 Poly2 = P1_9, rsq, P1_8
+ nop.i 999 ;;
+}
+{ .mmi
+(p0) ldfe Q1_5 = [table_ptr2], -16 ;;
+(p12) ldfe Q1_4 = [table_ptr2], -16
+ nop.i 999 ;;
+}
+{ .mfi
+(p12) ldfe Q1_3 = [table_ptr2], -16
+//
+// N even: Poly2 = P1_8 + P1_9 * rsq
+// N odd: poly2 = Q1_6 + Q1_7 * rsq
+//
+(p11) fma.s1 Poly1 = Poly1, rsq, P1_1
+ nop.i 999 ;;
+}
+{ .mfi
+(p12) ldfe Q1_2 = [table_ptr2], -16
+(p12) fma.s1 poly1 = S_hi, r, f1
+ nop.i 999 ;;
+}
+{ .mfi
+(p12) ldfe Q1_1 = [table_ptr2], -16
+(p11) fma.s1 Poly2 = Poly2, rsq, P1_7
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N even: CORR = rsq + 1
+// N even: r_to_the_8 = rsq * rsq
+//
+(p11) fmpy.s1 Poly1 = Poly1, rsq
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p12) fma.s1 S_hi = S_hi, poly1, S_hi
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p12) fma.s1 poly2 = Q1_7, rsq, Q1_6
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p11) fma.s1 Poly2 = Poly2, rsq, P1_6
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p12) fma.s1 poly1 = S_hi, r, f1
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p12) fma.s1 poly2 = poly2, rsq, Q1_5
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p11) fma.s1 Poly2= Poly2, rsq, P1_5
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p12) fma.s1 S_hi = S_hi, poly1, S_hi
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p12) fma.s1 poly2 = poly2, rsq, Q1_4
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N even: r_to_the_8 = r_to_the_8 * r_to_the_8
+// N odd: poly1 = S_hi * r + 1.0 64 bits partial
+//
+(p11) fma.s1 Poly2 = Poly2, rsq, P1_4
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N even: Result = CORR + Poly * r
+// N odd: P = Q1_1 + poly2 * rsq
+//
+(p12) fma.s1 poly1 = S_hi, r, f1
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p12) fma.s1 poly2 = poly2, rsq, Q1_3
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N even: Poly2 = P1_4 + Poly2 * rsq
+// N odd: poly2 = Q1_2 + poly2 * rsq
+//
+(p11) fma.s1 Poly = Poly2, r_to_the_8, Poly1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p12) fma.s1 poly1 = S_hi, c, poly1
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p12) fma.s1 poly2 = poly2, rsq, Q1_2
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+//
+// N even: Poly = Poly1 + Poly2 * r_to_the_8
+// N odd: S_hi = S_hi * poly1 + S_hi 64 bits
+//
+(p11) fma.s1 Result = Poly, r, CORR
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N even: Result = r + Result (User supplied rounding mode)
+// N odd: poly1 = S_hi * c + poly1
+//
+(p12) fmpy.s1 S_lo = S_hi, poly1
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p12) fma.s1 P = poly2, rsq, Q1_1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N odd: poly1 = S_hi * r + 1.0
+//
+//
+// N odd: S_lo = S_hi * poly1
+//
+(p11) fadd.s0 Result = Result, r
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N odd: S_lo = Q1_1 * c + S_lo
+//
+(p12) fma.s1 S_lo = Q1_1, c, S_lo
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fmpy.s0 fp_tmp = fp_tmp, fp_tmp // Dummy mult to set inexact
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N odd: Result = S_lo + r * P
+//
+(p12) fma.s1 Result = P, r, S_lo
+ nop.i 999 ;;
+}
+{ .mfb
+ nop.m 999
+//
+// N odd: Result = Result + S_hi (user supplied rounding mode)
+//
+(p12) fadd.s0 Result = Result, S_hi
+(p0) br.ret.sptk b0 ;;
+}
+
+
+L(TANL_NORMAL_R):
+{ .mfi
+(p0) getf.sig sig_r = r
+// *******************************************************************
+// *******************************************************************
+// *******************************************************************
+//
+// r and c have been computed.
+// Make sure ftz mode is set - should be automatic when using wre
+//
+//
+// Get [i_1] - lsb of N_fix_gr alone.
+//
+(p0) fmerge.s Pos_r = f1, r
+(p0) extr.u i_1 = N_fix_gr, 0, 1 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fmerge.s sgn_r = r, f1
+(p0) cmp.eq.unc p11, p12 = 0x0000, i_1 ;;
+}
+{ .mfi
+ nop.m 999
+ nop.f 999
+(p0) extr.u lookup = sig_r, 58, 5
+}
+{ .mlx
+ nop.m 999
+(p0) movl Create_B = 0x8200000000000000 ;;
+}
+{ .mfi
+(p0) addl table_ptr1 = @ltoff(TANL_BASE_CONSTANTS), gp
+ nop.f 999
+(p0) dep Create_B = lookup, Create_B, 58, 5
+}
+;;
+
+
+//
+// Get [i_1] - lsb of N_fix_gr alone.
+// Pos_r = abs (r)
+//
+
+
+{ .mmi
+(p0) ld8 table_ptr1 = [table_ptr1]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+
+{ .mmi
+ nop.m 999
+(p0) setf.sig B = Create_B
+//
+// Set table_ptr1 and table_ptr2 to base address of
+// constant table.
+//
+(p0) add table_ptr1 = 480, table_ptr1 ;;
+}
+{ .mmb
+ nop.m 999
+//
+// Is i_1 or i_0 == 0 ?
+// Create the constant 1 00000 1000000000000000000000...
+//
+(p0) ldfe P2_1 = [table_ptr1], 16
+ nop.b 999
+}
+{ .mmi
+ nop.m 999 ;;
+(p0) getf.exp exp_r = Pos_r
+ nop.i 999
+}
+//
+// Get r's exponent
+// Get r's significand
+//
+{ .mmi
+(p0) ldfe P2_2 = [table_ptr1], 16 ;;
+//
+// Get the 5 bits or r for the lookup. 1.xxxxx ....
+// from sig_r.
+// Grab lsb of exp of B
+//
+(p0) ldfe P2_3 = [table_ptr1], 16
+ nop.i 999 ;;
+}
+{ .mii
+ nop.m 999
+(p0) andcm table_offset = 0x0001, exp_r ;;
+(p0) shl table_offset = table_offset, 9 ;;
+}
+{ .mii
+ nop.m 999
+//
+// Deposit 0 00000 1000000000000000000000... on
+// 1 xxxxx yyyyyyyyyyyyyyyyyyyyyy...,
+// getting rid of the ys.
+// Is B = 2** -2 or B= 2** -1? If 2**-1, then
+// we want an offset of 512 for table addressing.
+//
+(p0) shladd table_offset = lookup, 4, table_offset ;;
+//
+// B = ........ 1xxxxx 1000000000000000000...
+//
+(p0) add table_ptr1 = table_ptr1, table_offset ;;
+}
+{ .mmb
+ nop.m 999
+//
+// B = ........ 1xxxxx 1000000000000000000...
+// Convert B so it has the same exponent as Pos_r
+//
+(p0) ldfd T_hi = [table_ptr1], 8
+ nop.b 999 ;;
+}
+
+
+
+//
+// x = |r| - B
+// Load T_hi.
+// Load C_hi.
+//
+
+{ .mmf
+(p0) addl table_ptr2 = @ltoff(TANL_BASE_CONSTANTS), gp
+(p0) ldfs T_lo = [table_ptr1]
+(p0) fmerge.se B = Pos_r, B
+}
+;;
+
+
+{ .mmi
+(p0) ld8 table_ptr2 = [table_ptr2]
+ nop.m 999
+ nop.i 999
+}
+;;
+
+
+{ .mii
+(p0) add table_ptr2 = 1360, table_ptr2
+ nop.i 999 ;;
+(p0) add table_ptr2 = table_ptr2, table_offset ;;
+}
+{ .mfi
+(p0) ldfd C_hi = [table_ptr2], 8
+(p0) fsub.s1 x = Pos_r, B
+ nop.i 999 ;;
+}
+{ .mii
+(p0) ldfs C_lo = [table_ptr2],255
+ nop.i 999 ;;
+//
+// xsq = x * x
+// N even: Tx = T_hi * x
+// Load T_lo.
+// Load C_lo - increment pointer to get SC_inv
+// - cant get all the way, do an add later.
+//
+(p0) add table_ptr2 = 569, table_ptr2 ;;
+}
+//
+// N even: Tx1 = Tx + 1
+// N odd: Cx1 = 1 - Cx
+//
+{ .mfi
+(p0) ldfe SC_inv = [table_ptr2], 0
+ nop.f 999
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fmpy.s1 xsq = x, x
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p11) fmpy.s1 Tx = T_hi, x
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p12) fmpy.s1 Cx = C_hi, x
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N odd: Cx = C_hi * x
+//
+(p0) fma.s1 P = P2_3, xsq, P2_2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+// N even and odd: P = P2_3 + P2_2 * xsq
+//
+(p11) fadd.s1 Tx1 = Tx, f1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N even: D = C_hi - tanx
+// N odd: D = T_hi + tanx
+//
+(p11) fmpy.s1 CORR = SC_inv, T_hi
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p0) fmpy.s1 Sx = SC_inv, x
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p12) fmpy.s1 CORR = SC_inv, C_hi
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p12) fsub.s1 V_hi = f1, Cx
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fma.s1 P = P, xsq, P2_1
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+// N even and odd: P = P2_1 + P * xsq
+//
+(p11) fma.s1 V_hi = Tx, Tx1, f1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N even: Result = sgn_r * tail + T_hi (user rounding mode for C1)
+// N odd: Result = sgn_r * tail + C_hi (user rounding mode for C1)
+//
+(p0) fmpy.s0 fp_tmp = fp_tmp, fp_tmp // Dummy mult to set inexact
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fmpy.s1 CORR = CORR, c
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p12) fnma.s1 V_hi = Cx,V_hi,f1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N even: V_hi = Tx * Tx1 + 1
+// N odd: Cx1 = 1 - Cx * Cx1
+//
+(p0) fmpy.s1 P = P, xsq
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+//
+// N even and odd: P = P * xsq
+//
+(p11) fmpy.s1 V_hi = V_hi, T_hi
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N even and odd: tail = P * tail + V_lo
+//
+(p11) fmpy.s1 T_hi = sgn_r, T_hi
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p0) fmpy.s1 CORR = CORR, sgn_r
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p12) fmpy.s1 V_hi = V_hi,C_hi
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N even: V_hi = T_hi * V_hi
+// N odd: V_hi = C_hi * V_hi
+//
+(p0) fma.s1 tanx = P, x, x
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p12) fnmpy.s1 C_hi = sgn_r, C_hi
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N even: V_lo = 1 - V_hi + C_hi
+// N odd: V_lo = 1 - V_hi + T_hi
+//
+(p11) fadd.s1 CORR = CORR, T_lo
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p12) fsub.s1 CORR = CORR, C_lo
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N even and odd: tanx = x + x * P
+// N even and odd: Sx = SC_inv * x
+//
+(p11) fsub.s1 D = C_hi, tanx
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p12) fadd.s1 D = T_hi, tanx
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N odd: CORR = SC_inv * C_hi
+// N even: CORR = SC_inv * T_hi
+//
+(p0) fnma.s1 D = V_hi, D, f1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N even and odd: D = 1 - V_hi * D
+// N even and odd: CORR = CORR * c
+//
+(p0) fma.s1 V_hi = V_hi, D, V_hi
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N even and odd: V_hi = V_hi + V_hi * D
+// N even and odd: CORR = sgn_r * CORR
+//
+(p11) fnma.s1 V_lo = V_hi, C_hi, f1
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p12) fnma.s1 V_lo = V_hi, T_hi, f1
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N even: CORR = COOR + T_lo
+// N odd: CORR = CORR - C_lo
+//
+(p11) fma.s1 V_lo = tanx, V_hi, V_lo
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p12) fnma.s1 V_lo = tanx, V_hi, V_lo
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N even: V_lo = V_lo + V_hi * tanx
+// N odd: V_lo = V_lo - V_hi * tanx
+//
+(p11) fnma.s1 V_lo = C_lo, V_hi, V_lo
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p12) fnma.s1 V_lo = T_lo, V_hi, V_lo
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N even: V_lo = V_lo - V_hi * C_lo
+// N odd: V_lo = V_lo - V_hi * T_lo
+//
+(p0) fmpy.s1 V_lo = V_hi, V_lo
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N even and odd: V_lo = V_lo * V_hi
+//
+(p0) fadd.s1 tail = V_hi, V_lo
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N even and odd: tail = V_hi + V_lo
+//
+(p0) fma.s1 tail = tail, P, V_lo
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N even: T_hi = sgn_r * T_hi
+// N odd : C_hi = -sgn_r * C_hi
+//
+(p0) fma.s1 tail = tail, Sx, CORR
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N even and odd: tail = Sx * tail + CORR
+//
+(p0) fma.s1 tail = V_hi, Sx, tail
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+//
+// N even an odd: tail = Sx * V_hi + tail
+//
+(p11) fma.s0 Result = sgn_r, tail, T_hi
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p12) fma.s0 Result = sgn_r, tail, C_hi
+(p0) br.ret.sptk b0 ;;
+}
+
+L(TANL_SPECIAL):
+{ .mfb
+ nop.m 999
+(p0) fmpy.s0 Arg = Arg, f0
+(p0) br.ret.sptk b0 ;;
+}
+//
+// Code for NaNs, Unsupporteds, Infs, or +/- zero ?
+// Invalid raised for Infs and SNaNs.
+//
+
+.endp tanl
+ASM_SIZE_DIRECTIVE(tanl)
+
+// *******************************************************************
+// *******************************************************************
+// *******************************************************************
+//
+// Special Code to handle very large argument case.
+// Call int pi_by_2_reduce(&x,&r,&c)
+// for |arguments| >= 2**63
+// (Arg or x) is in f8
+// Address to save r and c as double
+// *******************************************************************
+// *******************************************************************
+// *******************************************************************
+
+.proc __libm_callout
+__libm_callout:
+L(TANL_ARG_TOO_LARGE):
+.prologue
+{ .mfi
+ add r50=-32,sp // Parameter: r address
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfe [r50] = f0,16 // Clear Parameter r on stack
+ add r49 = 16,sp // Parameter x address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mib
+ stfe [r50] = f0,-16 // Clear Parameter c on stack
+ nop.i 0
+ nop.b 0
+}
+{ .mib
+ stfe [r49] = Arg // Store Parameter x on stack
+ nop.i 0
+(p0) br.call.sptk b0=__libm_pi_by_2_reduce# ;;
+};;
+//
+// Load 2^-2
+//
+{ .mmi
+(p0) ldfe Arg =[r49],16
+//
+// Call argument reduction
+//
+(p0) ldfs TWO_TO_NEG2 = [table_ptr2],4
+// Get Arg off stack
+// Get r off stack - hi order part
+// Get c off stack - lo order part
+(p0) mov N_fix_gr = r8 ;;
+}
+{ .mmb
+(p0) ldfe r =[r50],16
+(p0) ldfs NEGTWO_TO_NEG2 = [table_ptr2],4
+ nop.b 999 ;;
+}
+{ .mfi
+(p0) ldfe c =[r50],-32
+ nop.f 999
+ nop.i 999 ;;
+}
+{ .mfi
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+//
+// Is |r| < 2**(-2)
+//
+(p0) fcmp.lt.unc.s1 p6, p0 = r, TWO_TO_NEG2
+mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mfi
+ mov gp = GR_SAVE_GP // Restore gp
+(p6) fcmp.gt.unc.s1 p6, p0 = r, NEGTWO_TO_NEG2
+ mov ar.pfs = GR_SAVE_PFS // Restore gp
+};;
+{ .mbb
+ nop.m 999
+(p6) br.cond.spnt L(TANL_SMALL_R)
+(p0) br.cond.sptk L(TANL_NORMAL_R) ;;
+}
+
+.endp __libm_callout
+ASM_SIZE_DIRECTIVE(__libm_callout)
+
+.type __libm_pi_by_2_reduce#,@function
+.global __libm_pi_by_2_reduce#
diff --git a/sysdeps/ia64/fpu/s_trunc.S b/sysdeps/ia64/fpu/s_trunc.S
new file mode 100644
index 0000000..976ddf1
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_trunc.S
@@ -0,0 +1,188 @@
+.file "trunc.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 7/7/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
+// Bob Norin, Shane Story, and Ping Tak Peter Tang of the
+// Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+.align 32
+.global trunc#
+
+.section .text
+.proc trunc#
+.align 32
+
+// History
+//==============================================================
+// 7/7/00: Created
+//==============================================================
+
+// API
+//==============================================================
+// double trunc(double x)
+//==============================================================
+
+#include "libm_support.h"
+
+// general input registers:
+TRUNC_GR_FFFF = r14
+TRUNC_GR_signexp = r15
+TRUNC_GR_exponent = r16
+TRUNC_GR_expmask = r17
+TRUNC_GR_bigexp = r18
+
+// floating-point registers:
+// f8, f9, f11, f12
+
+// predicate registers used:
+// p6, p7, p8, p9, p10, p11
+
+// Overview of operation
+//==============================================================
+// double trunc(double x)
+// Return an integer value (represented as a double) less than or
+// equal to x in magnitude.
+// This is x rounded toward zero to an integral value.
+//==============================================================
+
+// double_extended
+// if the exponent is > 1003e => 3F(true) = 63(decimal)
+// we have a significand of 64 bits 1.63-bits.
+// If we multiply by 2^63, we no longer have a fractional part
+// So input is an integer value already.
+
+// double
+// if the exponent is >= 10033 => 34(true) = 52(decimal)
+// 34 + 3ff = 433
+// we have a significand of 53 bits 1.52-bits. (implicit 1)
+// If we multiply by 2^52, we no longer have a fractional part
+// So input is an integer value already.
+
+// single
+// if the exponent is > 10016 => 17(true) = 23(decimal)
+// we have a significand of 24 bits 1.23-bits. (implicit 1)
+// If we multiply by 2^23, we no longer have a fractional part
+// So input is an integer value already.
+
+trunc:
+
+{ .mfi
+ getf.exp TRUNC_GR_signexp = f8
+ fcvt.fx.trunc.s1 f9 = f8
+ addl TRUNC_GR_bigexp = 0x10033, r0
+}
+{ .mfi
+ mov TRUNC_GR_FFFF = 0x0FFFF
+ fnorm.d f11 = f8
+ mov TRUNC_GR_expmask = 0x1FFFF
+};;
+// get the exponent of x
+// convert x to integer in signficand of f9
+// Normalize x - this will raise invalid on SNaNs, the
+// denormal operand flag - and possibly a spurious U flag
+// get exponent only mask (will exclude sign bit)
+
+{ .mfi
+ nop.m 0
+ fclass.m p7,p8 = f8, 0x0b
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fcmp.eq.unc.s1 p9,p0 = f8,f0
+ nop.i 0
+};;
+// fclass to set p7 if unnorm
+{ .mmi
+ and TRUNC_GR_exponent = TRUNC_GR_signexp, TRUNC_GR_expmask ;;
+(p8) cmp.ge.unc p10,p11 = TRUNC_GR_exponent, TRUNC_GR_bigexp
+(p8) cmp.ne.unc p6,p0 = TRUNC_GR_exponent, TRUNC_GR_signexp
+};;
+// Get the exponent of x
+// Test if exponent such that result already an integer
+// Test if x < 0
+{ .mmi
+(p9) cmp.eq.andcm p10,p11 = r0, r0
+(p6) cmp.lt.unc p6,p0 = TRUNC_GR_exponent, TRUNC_GR_FFFF
+ nop.i 0
+};;
+// If -1 < x < 0, set p6, turn off p10 and p11, and set result to -0.0
+{ .mfb
+(p6) cmp.eq.andcm p10,p11 = r0, r0
+(p6) fmerge.s f8 = f8, f0
+ nop.b 0
+};;
+// If not a unnorm, set p10 if x already is a big int, nan, or inf?
+// If not a unnorm, set p10 if x already is a big int, nan, or inf?
+.pred.rel "mutex",p10,p11
+{ .mfb
+ nop.m 0
+(p11) fcvt.xf f8 = f9
+ nop.b 0
+}
+{ .mfb
+ nop.m 0
+(p10) fma.d.s1 f8 = f11,f1,f0
+(p8) br.ret.sptk b0
+};;
+// If not a unnorm and not an big int, nan,or +/-inf convert signficand
+// back to f8.
+// If not a unorm and a big int, nan, or +/-inf, return fnorm'd x
+// If not a unorm, Return
+// If unnorm, get the exponent again - perhaps it wasn't a denorm.
+{ .mfb
+(p7) getf.exp TRUNC_GR_signexp = f11
+(p7) fcvt.fx.trunc.s1 f12 = f11
+ nop.b 0
+};;
+{ .mfb
+ and TRUNC_GR_exponent = TRUNC_GR_signexp, TRUNC_GR_expmask
+ fcmp.lt.unc.s1 p9,p0 = f8,f0
+ nop.b 0
+};;
+{ .mfb
+ cmp.ge.unc p10,p11 = TRUNC_GR_exponent, TRUNC_GR_bigexp
+ nop.f 0
+ nop.b 0
+};;
+// If a unnorm, check to see if value is already a big int.
+{ .mfb
+ nop.m 0
+(p11) fcvt.xf f8 = f12
+ nop.b 0
+}
+{ .mfi
+ nop.m 0
+(p10) fma.d.s1 f8 = f11,f1,f0
+ nop.i 0
+};;
+{ .mfb
+ nop.m 0
+(p9) fmerge.ns f8 = f1,f8
+ br.ret.sptk b0
+};;
+// If so return it. Otherwise, return (fcvt.xf(fcvt.fx.trunc(x)))
+// Make sure the result is negative if it should be - that is
+// negative(denormal) -> -0.
+.endp trunc
+ASM_SIZE_DIRECTIVE(trunc)
diff --git a/sysdeps/ia64/fpu/s_truncf.S b/sysdeps/ia64/fpu/s_truncf.S
new file mode 100644
index 0000000..1036405
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_truncf.S
@@ -0,0 +1,188 @@
+.file "truncf.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 7/7/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
+// Bob Norin, Shane Story, and Ping Tak Peter Tang of the
+// Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+.align 32
+.global truncf#
+
+.section .text
+.proc truncf#
+.align 32
+
+// History
+//==============================================================
+// 7/7/00: Created
+//==============================================================
+
+// API
+//==============================================================
+// float truncf(float x)
+//==============================================================
+
+#include "libm_support.h"
+
+// general input registers:
+TRUNC_GR_FFFF = r14
+TRUNC_GR_signexp = r15
+TRUNC_GR_exponent = r16
+TRUNC_GR_expmask = r17
+TRUNC_GR_bigexp = r18
+
+// floating-point registers:
+// f8, f9, f11, f12
+
+// predicate registers used:
+// p6, p7, p8, p9, p10, p11
+
+// Overview of operation
+//==============================================================
+// float truncf(float x)
+// Return an integer value (represented as a float) less than or
+// equal to x in magnitude.
+// This is x rounded toward zero to an integral value.
+//==============================================================
+
+// double_extended
+// if the exponent is > 1003e => 3F(true) = 63(decimal)
+// we have a significand of 64 bits 1.63-bits.
+// If we multiply by 2^63, we no longer have a fractional part
+// So input is an integer value already.
+
+// double
+// if the exponent is >= 10033 => 34(true) = 52(decimal)
+// 34 + 3ff = 433
+// we have a significand of 53 bits 1.52-bits. (implicit 1)
+// If we multiply by 2^52, we no longer have a fractional part
+// So input is an integer value already.
+
+// single
+// if the exponent is > 10016 => 17(true) = 23(decimal)
+// we have a significand of 24 bits 1.23-bits. (implicit 1)
+// If we multiply by 2^23, we no longer have a fractional part
+// So input is an integer value already.
+
+truncf:
+
+{ .mfi
+ getf.exp TRUNC_GR_signexp = f8
+ fcvt.fx.trunc.s1 f9 = f8
+ addl TRUNC_GR_bigexp = 0x10016, r0
+}
+{ .mfi
+ mov TRUNC_GR_FFFF = 0x0FFFF
+ fnorm.s f11 = f8
+ mov TRUNC_GR_expmask = 0x1FFFF
+};;
+// get the exponent of x
+// convert x to integer in signficand of f9
+// Normalize x - this will raise invalid on SNaNs, the
+// denormal operand flag - and possibly a spurious U flag
+// get exponent only mask (will exclude sign bit)
+
+{ .mfi
+ nop.m 0
+ fclass.m p7,p8 = f8, 0x0b
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fcmp.eq.unc.s1 p9,p0 = f8,f0
+ nop.i 0
+};;
+// fclass to set p7 if unnorm
+{ .mmi
+ and TRUNC_GR_exponent = TRUNC_GR_signexp, TRUNC_GR_expmask ;;
+(p8) cmp.ge.unc p10,p11 = TRUNC_GR_exponent, TRUNC_GR_bigexp
+(p8) cmp.ne.unc p6,p0 = TRUNC_GR_exponent, TRUNC_GR_signexp
+};;
+// Get the exponent of x
+// Test if exponent such that result already an integer
+// Test if x < 0
+{ .mmi
+(p9) cmp.eq.andcm p10,p11 = r0, r0
+(p6) cmp.lt.unc p6,p0 = TRUNC_GR_exponent, TRUNC_GR_FFFF
+ nop.i 0
+};;
+// If -1 < x < 0, set p6, turn off p10 and p11, and set result to -0.0
+{ .mfb
+(p6) cmp.eq.andcm p10,p11 = r0, r0
+(p6) fmerge.s f8 = f8, f0
+ nop.b 0
+};;
+// If not a unnorm, set p10 if x already is a big int, nan, or inf?
+// If not a unnorm, set p10 if x already is a big int, nan, or inf?
+.pred.rel "mutex",p10,p11
+{ .mfb
+ nop.m 0
+(p11) fcvt.xf f8 = f9
+ nop.b 0
+}
+{ .mfb
+ nop.m 0
+(p10) fma.s.s1 f8 = f11,f1,f0
+(p8) br.ret.sptk b0
+};;
+// If not a unnorm and not an big int, nan,or +/-inf convert signficand
+// back to f8.
+// If not a unorm and a big int, nan, or +/-inf, return fnorm'd x
+// If not a unorm, Return
+// If unnorm, get the exponent again - perhaps it wasn't a denorm.
+{ .mfb
+(p7) getf.exp TRUNC_GR_signexp = f11
+(p7) fcvt.fx.trunc.s1 f12 = f11
+ nop.b 0
+};;
+{ .mfb
+ and TRUNC_GR_exponent = TRUNC_GR_signexp, TRUNC_GR_expmask
+ fcmp.lt.unc.s1 p9,p0 = f8,f0
+ nop.b 0
+};;
+{ .mfb
+ cmp.ge.unc p10,p11 = TRUNC_GR_exponent, TRUNC_GR_bigexp
+ nop.f 0
+ nop.b 0
+};;
+// If a unnorm, check to see if value is already a big int.
+{ .mfb
+ nop.m 0
+(p11) fcvt.xf f8 = f12
+ nop.b 0
+}
+{ .mfi
+ nop.m 0
+(p10) fma.s.s1 f8 = f11,f1,f0
+ nop.i 0
+};;
+{ .mfb
+ nop.m 0
+(p9) fmerge.ns f8 = f1,f8
+ br.ret.sptk b0
+};;
+// If so return it. Otherwise, return (fcvt.xf(fcvt.fx.trunc(x)))
+// Make sure the result is negative if it should be - that is
+// negative(denormal) -> -0.
+.endp truncf
+ASM_SIZE_DIRECTIVE(truncf)
diff --git a/sysdeps/ia64/fpu/s_truncl.S b/sysdeps/ia64/fpu/s_truncl.S
new file mode 100644
index 0000000..aca64b9
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_truncl.S
@@ -0,0 +1,188 @@
+.file "truncl.s"
+
+// Copyright (c) 2000, 2001, Intel Corporation
+// All rights reserved.
+//
+// Contributed 7/7/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
+// Bob Norin, Shane Story, and Ping Tak Peter Tang of the
+// Computational Software Lab, Intel Corporation.
+//
+// WARRANTY DISCLAIMER
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://developer.intel.com/opensource.
+//
+.align 32
+.global truncl#
+
+.section .text
+.proc truncl#
+.align 32
+
+// History
+//==============================================================
+// 7/7/00: Created
+//==============================================================
+
+// API
+//==============================================================
+// long double truncl(float x)
+//==============================================================
+
+#include "libm_support.h"
+
+// general input registers:
+TRUNC_GR_FFFF = r14
+TRUNC_GR_signexp = r15
+TRUNC_GR_exponent = r16
+TRUNC_GR_expmask = r17
+TRUNC_GR_bigexp = r18
+
+// floating-point registers:
+// f8, f9, f11, f12
+
+// predicate registers used:
+// p6, p7, p8, p9, p10, p11
+
+// Overview of operation
+//==============================================================
+// long double truncl(long double x)
+// Return an integer value (represented as a long double) less than or
+// equal to x in magnitude.
+// This is x rounded toward zero to an integral value.
+//==============================================================
+
+// double_extended
+// if the exponent is > 1003e => 3F(true) = 63(decimal)
+// we have a significand of 64 bits 1.63-bits.
+// If we multiply by 2^63, we no longer have a fractional part
+// So input is an integer value already.
+
+// double
+// if the exponent is >= 10033 => 34(true) = 52(decimal)
+// 34 + 3ff = 433
+// we have a significand of 53 bits 1.52-bits. (implicit 1)
+// If we multiply by 2^52, we no longer have a fractional part
+// So input is an integer value already.
+
+// single
+// if the exponent is > 10016 => 17(true) = 23(decimal)
+// we have a significand of 24 bits 1.23-bits. (implicit 1)
+// If we multiply by 2^23, we no longer have a fractional part
+// So input is an integer value already.
+
+truncl:
+
+{ .mfi
+ getf.exp TRUNC_GR_signexp = f8
+ fcvt.fx.trunc.s1 f9 = f8
+ addl TRUNC_GR_bigexp = 0x1003e, r0
+}
+{ .mfi
+ mov TRUNC_GR_FFFF = 0x0FFFF
+ fnorm f11 = f8
+ mov TRUNC_GR_expmask = 0x1FFFF
+};;
+// get the exponent of x
+// convert x to integer in signficand of f9
+// Normalize x - this will raise invalid on SNaNs, the
+// denormal operand flag - and possibly a spurious U flag
+// get exponent only mask (will exclude sign bit)
+
+{ .mfi
+ nop.m 0
+ fclass.m p7,p8 = f8, 0x0b
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fcmp.eq.unc.s1 p9,p0 = f8,f0
+ nop.i 0
+};;
+// fclass to set p7 if unnorm
+{ .mmi
+ and TRUNC_GR_exponent = TRUNC_GR_signexp, TRUNC_GR_expmask ;;
+(p8) cmp.ge.unc p10,p11 = TRUNC_GR_exponent, TRUNC_GR_bigexp
+(p8) cmp.ne.unc p6,p0 = TRUNC_GR_exponent, TRUNC_GR_signexp
+};;
+// Get the exponent of x
+// Test if exponent such that result already an integer
+// Test if x < 0
+{ .mmi
+(p9) cmp.eq.andcm p10,p11 = r0, r0
+(p6) cmp.lt.unc p6,p0 = TRUNC_GR_exponent, TRUNC_GR_FFFF
+ nop.i 0
+};;
+// If -1 < x < 0, set p6, turn off p10 and p11, and set result to -0.0
+{ .mfb
+(p6) cmp.eq.andcm p10,p11 = r0, r0
+(p6) fmerge.s f8 = f8, f0
+ nop.b 0
+};;
+// If not a unnorm, set p10 if x already is a big int, nan, or inf?
+// If not a unnorm, set p10 if x already is a big int, nan, or inf?
+.pred.rel "mutex",p10,p11
+{ .mfb
+ nop.m 0
+(p11) fcvt.xf f8 = f9
+ nop.b 0
+}
+{ .mfb
+ nop.m 0
+(p10) fma.s1 f8 = f11,f1,f0
+(p8) br.ret.sptk b0
+};;
+// If not a unnorm and not an big int, nan,or +/-inf convert signficand
+// back to f8.
+// If not a unorm and a big int, nan, or +/-inf, return fnorm'd x
+// If not a unorm, Return
+// If unnorm, get the exponent again - perhaps it wasn't a denorm.
+{ .mfb
+(p7) getf.exp TRUNC_GR_signexp = f11
+(p7) fcvt.fx.trunc.s1 f12 = f11
+ nop.b 0
+};;
+{ .mfb
+ and TRUNC_GR_exponent = TRUNC_GR_signexp, TRUNC_GR_expmask
+ fcmp.lt.unc.s1 p9,p0 = f8,f0
+ nop.b 0
+};;
+{ .mfb
+ cmp.ge.unc p10,p11 = TRUNC_GR_exponent, TRUNC_GR_bigexp
+ nop.f 0
+ nop.b 0
+};;
+// If a unnorm, check to see if value is already a big int.
+{ .mfb
+ nop.m 0
+(p11) fcvt.xf f8 = f12
+ nop.b 0
+}
+{ .mfi
+ nop.m 0
+(p10) fma.s1 f8 = f11,f1,f0
+ nop.i 0
+};;
+{ .mfb
+ nop.m 0
+(p9) fmerge.ns f8 = f1,f8
+ br.ret.sptk b0
+};;
+// If so return it. Otherwise, return (fcvt.xf(fcvt.fx.trunc(x)))
+// Make sure the result is negative if it should be - that is
+// negative(denormal) -> -0.
+.endp truncl
+ASM_SIZE_DIRECTIVE(truncl)
diff --git a/sysdeps/ia64/fpu/w_acos.c b/sysdeps/ia64/fpu/w_acos.c
new file mode 100644
index 0000000..41254ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/w_acos.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/w_acosf.c b/sysdeps/ia64/fpu/w_acosf.c
new file mode 100644
index 0000000..1cc8931
--- /dev/null
+++ b/sysdeps/ia64/fpu/w_acosf.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/w_acosl.c b/sysdeps/ia64/fpu/w_acosl.c
new file mode 100644
index 0000000..1cc8931
--- /dev/null
+++ b/sysdeps/ia64/fpu/w_acosl.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/w_asin.c b/sysdeps/ia64/fpu/w_asin.c
new file mode 100644
index 0000000..41254ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/w_asin.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/w_asinf.c b/sysdeps/ia64/fpu/w_asinf.c
new file mode 100644
index 0000000..41254ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/w_asinf.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/w_asinl.c b/sysdeps/ia64/fpu/w_asinl.c
new file mode 100644
index 0000000..41254ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/w_asinl.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/w_atan2.c b/sysdeps/ia64/fpu/w_atan2.c
new file mode 100644
index 0000000..41254ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/w_atan2.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/w_atan2f.c b/sysdeps/ia64/fpu/w_atan2f.c
new file mode 100644
index 0000000..41254ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/w_atan2f.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/w_atan2l.c b/sysdeps/ia64/fpu/w_atan2l.c
new file mode 100644
index 0000000..41254ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/w_atan2l.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/w_cosh.c b/sysdeps/ia64/fpu/w_cosh.c
new file mode 100644
index 0000000..41254ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/w_cosh.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/w_coshf.c b/sysdeps/ia64/fpu/w_coshf.c
new file mode 100644
index 0000000..41254ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/w_coshf.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/w_coshl.c b/sysdeps/ia64/fpu/w_coshl.c
new file mode 100644
index 0000000..41254ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/w_coshl.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/w_exp.c b/sysdeps/ia64/fpu/w_exp.c
new file mode 100644
index 0000000..41254ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/w_exp.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/w_expf.c b/sysdeps/ia64/fpu/w_expf.c
new file mode 100644
index 0000000..41254ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/w_expf.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/w_fmod.c b/sysdeps/ia64/fpu/w_fmod.c
new file mode 100644
index 0000000..41254ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/w_fmod.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/w_fmodf.c b/sysdeps/ia64/fpu/w_fmodf.c
new file mode 100644
index 0000000..41254ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/w_fmodf.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/w_fmodl.c b/sysdeps/ia64/fpu/w_fmodl.c
new file mode 100644
index 0000000..41254ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/w_fmodl.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/w_hypot.c b/sysdeps/ia64/fpu/w_hypot.c
new file mode 100644
index 0000000..41254ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/w_hypot.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/w_hypotf.c b/sysdeps/ia64/fpu/w_hypotf.c
new file mode 100644
index 0000000..41254ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/w_hypotf.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/w_hypotl.c b/sysdeps/ia64/fpu/w_hypotl.c
new file mode 100644
index 0000000..41254ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/w_hypotl.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/w_log.c b/sysdeps/ia64/fpu/w_log.c
new file mode 100644
index 0000000..41254ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/w_log.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/w_log10.c b/sysdeps/ia64/fpu/w_log10.c
new file mode 100644
index 0000000..41254ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/w_log10.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/w_log10f.c b/sysdeps/ia64/fpu/w_log10f.c
new file mode 100644
index 0000000..41254ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/w_log10f.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/w_log10l.c b/sysdeps/ia64/fpu/w_log10l.c
new file mode 100644
index 0000000..41254ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/w_log10l.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/w_logf.c b/sysdeps/ia64/fpu/w_logf.c
new file mode 100644
index 0000000..41254ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/w_logf.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/w_logl.c b/sysdeps/ia64/fpu/w_logl.c
new file mode 100644
index 0000000..41254ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/w_logl.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/w_pow.c b/sysdeps/ia64/fpu/w_pow.c
new file mode 100644
index 0000000..41254ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/w_pow.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/w_powf.c b/sysdeps/ia64/fpu/w_powf.c
new file mode 100644
index 0000000..41254ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/w_powf.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/w_powl.c b/sysdeps/ia64/fpu/w_powl.c
new file mode 100644
index 0000000..41254ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/w_powl.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/w_remainder.c b/sysdeps/ia64/fpu/w_remainder.c
new file mode 100644
index 0000000..41254ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/w_remainder.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/w_remainderf.c b/sysdeps/ia64/fpu/w_remainderf.c
new file mode 100644
index 0000000..41254ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/w_remainderf.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/w_remainderl.c b/sysdeps/ia64/fpu/w_remainderl.c
new file mode 100644
index 0000000..41254ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/w_remainderl.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/w_scalb.c b/sysdeps/ia64/fpu/w_scalb.c
new file mode 100644
index 0000000..41254ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/w_scalb.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/w_scalbf.c b/sysdeps/ia64/fpu/w_scalbf.c
new file mode 100644
index 0000000..41254ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/w_scalbf.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/w_scalbl.c b/sysdeps/ia64/fpu/w_scalbl.c
new file mode 100644
index 0000000..41254ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/w_scalbl.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/w_sqrt.c b/sysdeps/ia64/fpu/w_sqrt.c
new file mode 100644
index 0000000..41254ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/w_sqrt.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/w_sqrtf.c b/sysdeps/ia64/fpu/w_sqrtf.c
new file mode 100644
index 0000000..41254ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/w_sqrtf.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/w_sqrtl.c b/sysdeps/ia64/fpu/w_sqrtl.c
new file mode 100644
index 0000000..41254ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/w_sqrtl.c
@@ -0,0 +1 @@
+/* Not needed. */