diff options
Diffstat (limited to 'sysdeps/ia64/fpu/e_log2f.S')
-rw-r--r-- | sysdeps/ia64/fpu/e_log2f.S | 551 |
1 files changed, 551 insertions, 0 deletions
diff --git a/sysdeps/ia64/fpu/e_log2f.S b/sysdeps/ia64/fpu/e_log2f.S new file mode 100644 index 0000000..17d710a --- /dev/null +++ b/sysdeps/ia64/fpu/e_log2f.S @@ -0,0 +1,551 @@ +.file "log2f.s" + + +// Copyright (c) 2000 - 2003, Intel Corporation +// All rights reserved. +// +// Contributed 2000 by the Intel Numerics Group, Intel Corporation +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// * The name of Intel Corporation may not be used to endorse or promote +// products derived from this software without specific prior written +// permission. + +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Intel Corporation is the author of this code, and requests that all +// problem reports or change requests be submitted to it directly at +// http://www.intel.com/software/products/opensource/libraries/num.htm. +// +// History +//============================================================== +// 09/11/00 Initial version +// 05/20/02 Cleaned up namespace and sf0 syntax +// 02/10/03 Reordered header: .section, .global, .proc, .align +// +// API +//============================================================== +// float log2f(float) +// +// Overview of operation +//============================================================== +// Background +// +// Implementation +// +// Let x = 2^l * m, where m=1.b1 b2 ... b8 b9 ... b52 +// y=frcpa(m), r=m*y-1, f=b1 b2 .. b8 (table index) +// j=0 if f<128; j=1 if f>=128 +// T is a table that stores log2(1/y) (in entries 1..255) rounded to +// double extended precision; f is used as an index; T[255]=0 +// +// If f=0 and b9=0, r is set to 2^{-8}* 0.b9 b10 ... b52 = m-1 (fractional part of m), +// and 0 is used instead of T[0] +// (polynomial evaluation only, for m=1+r, 0<=r<2^{-9}) +// If f=255, r is set to (m-2)/2 (T[255]=0, and only polynomial evaluation is used +// for m=2(1-r'), 0<=r'<2^{-9}) +// +// log2f(x) is approximated as +// (l-j) + T[f] + (c1*r+c2*r^2+...+c6*r^6), if f>0 +// + + +// Special values +//============================================================== +// log2f(0)=-inf, raises Divide by Zero +// log2f(+inf)=inf +// log2f(x)=NaN, raises Invalid if x<0 +// + + +// Registers used +//============================================================== +// f6-f14 +// r2-r3, r23-r30 +// p6,p7,p8,p12 +// + + +GR_SAVE_B0 = r33 +GR_SAVE_PFS = r34 +GR_SAVE_GP = r35 // This reg. can safely be used +GR_SAVE_SP = r36 + +GR_Parameter_X = r37 +GR_Parameter_Y = r38 +GR_Parameter_RESULT = r39 +GR_Parameter_TAG = r40 + +FR_X = f10 +FR_Y = f1 +FR_RESULT = f8 + + + + +// Data tables +//============================================================== + +RODATA + +.align 16 + +LOCAL_OBJECT_START(poly_coeffs) + +data8 0x3fdec709dc3a03fd, 0xbfd71547652b82fe //C_3 and C_4 +data8 0xb8aa3b295c17f0bc, 0x00003fff // C_1 +data8 0xb8aa3b295c17f0bc, 0x0000bffe // C_2 +LOCAL_OBJECT_END(poly_coeffs) + + +LOCAL_OBJECT_START(T_table) + +data8 0x3f671b0ea42e5fda, 0x3f815cfe8eaec830 +data8 0x3f8cfee70c5ce5dc, 0x3f94564a62192834 +data8 0x3f997723ace35766, 0x3f9f5923c69b54a1 +data8 0x3fa2a094a085d693, 0x3fa538941776b01e +data8 0x3fa8324c9b914bc7, 0x3faacf54ce07d7e9 +data8 0x3fadced958dadc12, 0x3fb0387efbca869e +data8 0x3fb18ac6067479c0, 0x3fb30edd3e13530d +data8 0x3fb463c15936464e, 0x3fb5b9e13c3fa21d +data8 0x3fb7113f3259e07a, 0x3fb869dd8d1b2035 +data8 0x3fb9c3bea49d3214, 0x3fbb1ee4d7961701 +data8 0x3fbc7b528b70f1c5, 0x3fbdd90a2c676ed4 +data8 0x3fbf05d4976c2028, 0x3fc032fbbaee6d65 +data8 0x3fc0e3b5a9f3284a, 0x3fc195195c7d125b +data8 0x3fc22dadc2ab3497, 0x3fc2e050231df57d +data8 0x3fc379f79c2b255b, 0x3fc42ddd2ba1b4a9 +data8 0x3fc4c89b9e6807f5, 0x3fc563dc29ffacb2 +data8 0x3fc619a25f5d798d, 0x3fc6b5ffbf367644 +data8 0x3fc752e1f660f8d6, 0x3fc7f049e753e7cf +data8 0x3fc8a8980abfbd32, 0x3fc94724cca657be +data8 0x3fc9e63a24971f46, 0x3fca85d8feb202f7 +data8 0x3fcb2602497d5346, 0x3fcbc6b6f5ee1c9b +data8 0x3fcc67f7f770a67e, 0x3fcceec4b2234fba +data8 0x3fcd91097ad13982, 0x3fce33dd57f3d335 +data8 0x3fced74146bc7b10, 0x3fcf7b3646fef683 +data8 0x3fd00223a943dc19, 0x3fd054a474bf0eb7 +data8 0x3fd0999d9b9259a1, 0x3fd0eca66d3b2581 +data8 0x3fd13ffa2e85b475, 0x3fd185a444fa0a7b +data8 0x3fd1cb8312f27eff, 0x3fd21fa1441ce5e8 +data8 0x3fd265f526e603cb, 0x3fd2baa0c34be1ec +data8 0x3fd3016b45de21ce, 0x3fd3486c38aa29a8 +data8 0x3fd38fa3efaa8262, 0x3fd3e562c0816a02 +data8 0x3fd42d141f53b646, 0x3fd474fd543f222c +data8 0x3fd4bd1eb680e548, 0x3fd505789e234bd1 +data8 0x3fd54e0b64003b70, 0x3fd596d761c3c1f0 +data8 0x3fd5dfdcf1eeae0e, 0x3fd6291c6fd9329c +data8 0x3fd6729637b59418, 0x3fd6bc4aa692e0fd +data8 0x3fd7063a1a5fb4f2, 0x3fd75064f1ed0715 +data8 0x3fd79acb8cf10390, 0x3fd7d67c1e43ae5c +data8 0x3fd8214f4068afa7, 0x3fd86c5f36dea3dc +data8 0x3fd8b7ac64dd7f9d, 0x3fd8f4167a0c6f92 +data8 0x3fd93fd2d5e1bf1d, 0x3fd98bcd84296946 +data8 0x3fd9c8c333e6e9a5, 0x3fda152f142981b4 +data8 0x3fda527fd95fd8ff, 0x3fda9f5e3edeb9e6 +data8 0x3fdadd0b2b5755a7, 0x3fdb2a5d6f51ff83 +data8 0x3fdb686799b00be3, 0x3fdbb62f1b887cd8 +data8 0x3fdbf4979f666668, 0x3fdc332a6e8399d4 +data8 0x3fdc819dc2d45fe4, 0x3fdcc0908e19b7bd +data8 0x3fdcffae611ad12b, 0x3fdd3ef776d43ff4 +data8 0x3fdd8e5002710128, 0x3fddcdfb486cb9a1 +data8 0x3fde0dd294245fe4, 0x3fde4dd622a28840 +data8 0x3fde8e06317114f0, 0x3fdece62fe9a9915 +data8 0x3fdf1f164a15389a, 0x3fdf5fd8a9063e35 +data8 0x3fdfa0c8937e7d5d, 0x3fdfe1e649bb6335 +data8 0x3fe011990641535a, 0x3fe032560e91e59e +data8 0x3fe0532a5ebcd44a, 0x3fe0741617f5fc28 +data8 0x3fe08cd653f38839, 0x3fe0adeb55c1103b +data8 0x3fe0cf181d5d1dd0, 0x3fe0f05ccd0aced7 +data8 0x3fe111b9875788ab, 0x3fe1332e6f1bcf73 +data8 0x3fe154bba77c2088, 0x3fe16df59bfa06c1 +data8 0x3fe18fadb6e2d3c2, 0x3fe1b17e849adc26 +data8 0x3fe1caeb6a0de814, 0x3fe1ece7c830eec9 +data8 0x3fe20efd3dae01df, 0x3fe2289de375d901 +data8 0x3fe24adf9b6a6fe0, 0x3fe26d3ad1aebcfc +data8 0x3fe287100c2771f4, 0x3fe2a9983b3c1b28 +data8 0xbfda78e146f7bef4, 0xbfda33760a7f6051 +data8 0xbfd9ff43476fb5f7, 0xbfd9b97c3c4eec8f +data8 0xbfd98504431717fc, 0xbfd93ee07535f967 +data8 0xbfd90a228d5712b2, 0xbfd8c3a104cb24f5 +data8 0xbfd88e9c72e0b226, 0xbfd847bc33d8618e +data8 0xbfd812703988bb69, 0xbfd7dd0569c04bff +data8 0xbfd7959c202292f1, 0xbfd75fe8d2c5d48f +data8 0xbfd72a1637cbc183, 0xbfd6e221cd9d0cde +data8 0xbfd6ac059985503b, 0xbfd675c99ce81f92 +data8 0xbfd63f6db2590482, 0xbfd5f6c138136489 +data8 0xbfd5c01a39fbd688, 0xbfd58952cf519193 +data8 0xbfd5526ad18493ce, 0xbfd51b6219bfe6ea +data8 0xbfd4d1cdf8b4846f, 0xbfd49a784bcd1b8b +data8 0xbfd4630161832547, 0xbfd42b6911cf5465 +data8 0xbfd3f3af3461e1c4, 0xbfd3bbd3a0a1dcfb +data8 0xbfd383d62dac7ae7, 0xbfd34bb6b2546218 +data8 0xbfd313750520f520, 0xbfd2db10fc4d9aaf +data8 0xbfd2a28a6dc90387, 0xbfd269e12f346e2c +data8 0xbfd2311515e2e855, 0xbfd1f825f6d88e13 +data8 0xbfd1bf13a6c9c69f, 0xbfd185ddfa1a7ed0 +data8 0xbfd14c84c4dd6128, 0xbfd11307dad30b76 +data8 0xbfd0d9670f6941fe, 0xbfd09fa235ba2020 +data8 0xbfd0790adbb03009, 0xbfd03f09858c55fb +data8 0xbfd004e3a7c97cbd, 0xbfcf9532288fcf69 +data8 0xbfcf205339208f27, 0xbfceab2a23a5b83e +data8 0xbfce5ce55fdd37a5, 0xbfcde73fe3b1480f +data8 0xbfcd714f44623927, 0xbfccfb1321b8c400 +data8 0xbfccac163c770dc9, 0xbfcc355b67195dd0 +data8 0xbfcbbe540a3f036f, 0xbfcb6ecf175f95e9 +data8 0xbfcaf74751e1be33, 0xbfca7f71fb7bab9d +data8 0xbfca2f632320b86b, 0xbfc9b70ba539dfae +data8 0xbfc93e6587910444, 0xbfc8edcae8352b6c +data8 0xbfc874a0db01a719, 0xbfc7fb27199df16d +data8 0xbfc7a9fec7d05ddf, 0xbfc72fff456ac70d +data8 0xbfc6de7d66023dbc, 0xbfc663f6fac91316 +data8 0xbfc6121ac74813cf, 0xbfc5970c478fff4a +data8 0xbfc51bab907a5c8a, 0xbfc4c93d33151b24 +data8 0xbfc44d527fdadf55, 0xbfc3fa87be0f3a1b +data8 0xbfc3a797cd35d959, 0xbfc32ae9e278ae1a +data8 0xbfc2d79c6937efdd, 0xbfc25a619370d9dc +data8 0xbfc206b5bde2f8b8, 0xbfc188ecbd1d16be +data8 0xbfc134e1b489062e, 0xbfc0b6894488e95f +data8 0xbfc0621e2f556b5c, 0xbfc00d8c711a12cc +data8 0xbfbf1cd21257e18c, 0xbfbe72ec117fa5b2 +data8 0xbfbdc8b7c49a1ddb, 0xbfbcc8d5e467b710 +data8 0xbfbc1ddc9c39c7a1, 0xbfbb7294093cdd0f +data8 0xbfba7111df348494, 0xbfb9c501cdf75872 +data8 0xbfb918a16e46335b, 0xbfb81579a73e83c6 +data8 0xbfb7684f39f4ff2d, 0xbfb6bad3758efd87 +data8 0xbfb60d060d7e41ac, 0xbfb507b836033bb7 +data8 0xbfb4591d6310d85a, 0xbfb3aa2fdd27f1c3 +data8 0xbfb2faef55ccb372, 0xbfb1f3723b4ae6db +data8 0xbfb14360d6136ffa, 0xbfb092fb594145c1 +data8 0xbfafc482e8b48a7e, 0xbfae6265ace11ae4 +data8 0xbfacff9e5c4341d0, 0xbfaaea3316095f72 +data8 0xbfa985bfc3495194, 0xbfa820a01ac754cb +data8 0xbfa6bad3758efd87, 0xbfa554592bb8cd58 +data8 0xbfa3ed3094685a26, 0xbfa2855905ca70f6 +data8 0xbfa11cd1d5133413, 0xbf9dfd78881399f1 +data8 0xbf9b28f618cc85df, 0xbf98530faa3c087b +data8 0xbf957bc3dddcd7fa, 0xbf92a3115322f9e6 +data8 0xbf8f91ed4eef8370, 0xbf89dae4ec6b8b2e +data8 0xbf842106b1499209, 0xbf7cc89f97d67594 +data8 0xbf71497accf7e11d, 0x0000000000000000 +LOCAL_OBJECT_END(T_table) + + +.section .text +GLOBAL_LIBM_ENTRY(log2f) + +{ .mfi + alloc r32=ar.pfs,1,4,4,0 + // y=frcpa(x) + frcpa.s1 f6,p0=f1,f8 + // will form significand of 1.5 (to test whether the index is 128 or above) + mov r24=0xc +} +{.mfi + nop.m 0 + // normalize x + fma.s1 f7=f8,f1,f0 + // r2 = pointer to C_1...C_6 followed by T_table + addl r2 = @ltoff(poly_coeffs), gp;; +} +{.mfi + // get significand + getf.sig r25=f8 + // f8 denormal ? + fclass.m p8,p10=f8,0x9 + // will form significand of 1.5 (to test whether the index is 128 or above) + shl r24=r24,60 +} +{.mfi + mov r26=0x804 + nop.f 0 + // r23=bias-1 + mov r23=0xfffe;; +} + +{.mmf + getf.exp r29=f8 + // load start address for C_1...C_6 followed by T_table + ld8 r2=[r2] + // will continue only for positive normal/denormal numbers + fclass.nm.unc p12,p7 = f8, 0x19 ;; +} + +.pred.rel "mutex",p8,p10 +{.mfi + // denormal input, repeat get significand (after normalization) + (p8) getf.sig r25=f7 + // x=1 ? + fcmp.eq.s0 p6,p0=f8,f1 + // get T_index + (p10) shr.u r28=r25,63-8 +} +{.mfi + // f12=0.5 + setf.exp f12=r23 + nop.f 0 + // r27=bias + mov r27=0xffff;; +} + +{.mfb + // denormal input, repeat get exponent (after normalization) + (p8) getf.exp r29=f7 + nop.f 0 + (p12) br.cond.spnt SPECIAL_log2f +} +{.mfi + cmp.geu p12,p0=r25,r24 + nop.f 0 + mov r23=0xff;; +} + +{.mfi + add r3=32,r2 + // r=1-x*y + fms.s1 f6=f6,f8,f1 + // r26=0x80400...0 (threshold for using polynomial approximation) + shl r26=r26,64-12 +} +{.mfi + // load C_3, C_4 + ldfpd f10,f11=[r2],16 + nop.f 0 + // r27=bias-1 (if index >=128, will add exponent+1) + (p12) mov r27=0xfffe;; +} + +{.mfi + // load C_1 + ldfe f14=[r2],32 + // x=1, return 0 + (p6) fma.s.s0 f8=f0,f0,f0 + (p8) shr.u r28=r25,63-8 +} +{.mib + // load C_2 + ldfe f13=[r3] + // r29=exponent-bias + sub r29=r29,r27 + // x=1, return + (p6) br.ret.spnt b0;; +} + + +{.mfi + // get T_index + and r28=r28,r23 + fmerge.se f7=f1,f7 + // if first 9 bits after leading 1 are all zero, then p8=1 + cmp.ltu p8,p12=r25,r26;; +} +{.mfi + // f8=expon - bias + setf.sig f8=r29 + nop.f 0 + // get T address + shladd r2=r28,3,r2 +} +{.mfi + // first 8 bits after leading 1 are all ones ? + cmp.eq p10,p0=r23,r28 + // if first 8 bits after leading bit are 0, use polynomial approx. only + (p8) fms.s1 f6=f7,f1,f1 + nop.i 0;; +} +{.mfi + //r26=1 + mov r26=1 + // if first 8 bits after leading 1 are all ones, use polynomial approx. only + (p10) fms.s1 f6=f7,f12,f1 + nop.i 0;; +} + +.pred.rel "mutex",p8,p12 +{.mmf + // load T (unless first 9 bits after leading 1 are 0) + (p12) ldfd f12=[r2] + nop.m 0 + // set T=0 (if first 9 bits after leading 1 are 0) + (p8) fma.s1 f12=f0,f0,f0;; +} + +{.mfi + nop.m 0 + // P34=C_3+C_4*r + fma.s1 f10=f11,f6,f10 + // r26=2^{63} + shl r26=r26,63 +} +{.mfi + nop.m 0 + // r2=r*r + fma.s1 f11=f6,f6,f0 + nop.i 0;; +} +{.mfi + // significand of x is 1 ? + cmp.eq p0,p6=r25,r26 + // P12=C_1+C_2*r + fma.s1 f14=f13,f6,f14 + nop.i 0;; +} +{.mfi + nop.m 0 + // normalize additive term (l=exponent of x) + fcvt.xf f8=f8 + // if significand(x)=1, return exponent (l) + nop.i 0;; +} +{.mfi + nop.m 0 + // add T+l + (p6) fma.s1 f8=f8,f1,f12 + nop.i 0 +} +{.mfi + nop.m 0 + // P14=P12+r2*P34 + (p6) fma.s1 f13=f10,f11,f14 + nop.i 0;; +} + +{.mfb + nop.m 0 + // result=T+l+r*P14 + (p6) fma.s.s0 f8=f13,f6,f8 + // return + br.ret.sptk b0;; +} + + +SPECIAL_log2f: +{.mfi + nop.m 0 + // x=+Infinity ? + fclass.m p7,p0=f8,0x21 + nop.i 0;; +} +{.mfi + nop.m 0 + // x=+/-Zero ? + fclass.m p8,p0=f8,0x7 + nop.i 0;; +} +{.mfi + nop.m 0 + // x=-Infinity, -normal, -denormal ? + fclass.m p6,p0=f8,0x3a + nop.i 0;; +} +{.mfb + nop.m 0 + // log2f(+Infinity)=+Infinity + nop.f 0 + (p7) br.ret.spnt b0;; +} +{.mfi + (p8) mov GR_Parameter_TAG = 172 + // log2f(+/-0)=-infinity, raises Divide by Zero + // set f8=-0 + (p8) fmerge.ns f8=f0,f8 + nop.i 0;; +} +{.mfb + nop.m 0 + (p8) frcpa.s0 f8,p0=f1,f8 + (p8) br.cond.sptk __libm_error_region;; +} +{.mfb + (p6) mov GR_Parameter_TAG = 173 + // x<0: return NaN, raise Invalid + (p6) frcpa.s0 f8,p0=f0,f0 + (p6) br.cond.sptk __libm_error_region;; +} + + +{.mfb + nop.m 0 + // Remaining cases: NaNs + fma.s.s0 f8=f8,f1,f0 + br.ret.sptk b0;; +} + +GLOBAL_LIBM_END(log2f) + + +LOCAL_LIBM_ENTRY(__libm_error_region) +.prologue +{ .mfi + add GR_Parameter_Y=-32,sp // Parameter 2 value + nop.f 0 +.save ar.pfs,GR_SAVE_PFS + mov GR_SAVE_PFS=ar.pfs // Save ar.pfs +} +{ .mfi +.fframe 64 + add sp=-64,sp // Create new stack + nop.f 0 + mov GR_SAVE_GP=gp // Save gp +};; +{ .mmi + stfs [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack + add GR_Parameter_X = 16,sp // Parameter 1 address +.save b0, GR_SAVE_B0 + mov GR_SAVE_B0=b0 // Save b0 +};; +.body +{ .mib + stfs [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack + add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address + nop.b 0 +} +{ .mib + stfs [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack + add GR_Parameter_Y = -16,GR_Parameter_Y + br.call.sptk b0=__libm_error_support# // Call error handling function +};; +{ .mmi + nop.m 0 + nop.m 0 + add GR_Parameter_RESULT = 48,sp +};; +{ .mmi + ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack +.restore sp + add sp = 64,sp // Restore stack pointer + mov b0 = GR_SAVE_B0 // Restore return address +};; +{ .mib + mov gp = GR_SAVE_GP // Restore gp + mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs + br.ret.sptk b0 // Return +};; + +LOCAL_LIBM_END(__libm_error_region) +.type __libm_error_support#,@function +.global __libm_error_support# + + + + |