softfloat: add reciprocal api

Signed-off-by: Chih-Min Chao <chihmin.chao@sifive.com>
author: Chih-Min Chao <chihmin.chao@sifive.com> 2020-08-25 21:24:25 -0700
committer: Chih-Min Chao <chihmin.chao@sifive.com> 2020-08-31 00:44:35 -0700
commit: eceda60356a435dfac2c5573badf0144f4c7f146 (patch)
tree: c9fb5d98c196f05c488103860227a58537f496ba
parent: e11db4e6c978eb20a2be2bb253755703231971e3 (diff)
download: spike-eceda60356a435dfac2c5573badf0144f4c7f146.zip
spike-eceda60356a435dfac2c5573badf0144f4c7f146.tar.gz
spike-eceda60356a435dfac2c5573badf0144f4c7f146.tar.bz2
3 files changed, 399 insertions, 0 deletions
diff --git a/softfloat/fall_reciprocal.c b/softfloat/fall_reciprocal.c
new file mode 100644
index 0000000..1c96458
--- /dev/null
+++ b/softfloat/fall_reciprocal.c
@@ -0,0 +1,392 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+static inline uint64_t extract64(uint64_t val, int pos, int len)
+{
+  assert(pos >= 0 && len > 0 && len <= 64 - pos);
+  return (val >> pos) & (~UINT64_C(0) >> (64 - len));
+}
+
+static inline uint64_t make_mask64(int pos, int len)
+{
+    assert(pos >= 0 && len > 0 && pos < 64 && len <= 64);
+    return (UINT64_MAX >> (64 - len)) << pos;
+}
+
+//user needs to truncate output to required length
+static inline uint64_t rsqrte7(uint64_t val, int e, int s, bool sub) {
+  uint64_t exp = extract64(val, s, e);
+  uint64_t sig = extract64(val, 0, s);
+  uint64_t sign = extract64(val, s + e, 1);
+  const int p = 7;
+
+  static const uint8_t table[] = {
+      52, 51, 50, 48, 47, 46, 44, 43,
+      42, 41, 40, 39, 38, 36, 35, 34,
+      33, 32, 31, 30, 30, 29, 28, 27,
+      26, 25, 24, 23, 23, 22, 21, 20,
+      19, 19, 18, 17, 16, 16, 15, 14,
+      14, 13, 12, 12, 11, 10, 10, 9,
+      9, 8, 7, 7, 6, 6, 5, 4,
+      4, 3, 3, 2, 2, 1, 1, 0,
+      127, 125, 123, 121, 119, 118, 116, 114,
+      113, 111, 109, 108, 106, 105, 103, 102,
+      100, 99, 97, 96, 95, 93, 92, 91,
+      90, 88, 87, 86, 85, 84, 83, 82,
+      80, 79, 78, 77, 76, 75, 74, 73,
+      72, 71, 70, 70, 69, 68, 67, 66,
+      65, 64, 63, 63, 62, 61, 60, 59,
+      59, 58, 57, 56, 56, 55, 54, 53};
+
+  if (sub) {
+      while (extract64(sig, s - 1, 1) == 0)
+          exp--, sig <<= 1;
+
+      sig = (sig << 1) & make_mask64(0 ,s);
+  }
+
+  int idx = ((exp & 1) << (p-1)) | (sig >> (s-p+1));
+  uint64_t out_sig = (uint64_t)(table[idx]) << (s-p);
+  uint64_t out_exp = (3 * make_mask64(0, e - 1) + ~exp) / 2;
+
+  return (sign << (s+e)) | (out_exp << s) | out_sig;
+}
+
+float16_t f16_rsqrte7(float16_t in)
+{
+    union ui16_f16 uA;
+
+    uA.f = in;
+    unsigned int ret = f16_classify(in);
+    bool sub = false;
+    switch(ret) {
+    case 0x001: // -inf
+    case 0x002: // -normal
+    case 0x004: // -subnormal
+    case 0x100: // sNaN
+        softfloat_exceptionFlags |= softfloat_flag_invalid;
+    case 0x200: //qNaN
+        uA.ui = defaultNaNF16UI;
+        break;
+    case 0x008: // -0
+        uA.ui = 0xfc00;
+        softfloat_exceptionFlags |= softfloat_flag_infinite;
+        break;
+    case 0x010: // +0
+        uA.ui = 0x7c00;
+        softfloat_exceptionFlags |= softfloat_flag_infinite;
+        break;
+    case 0x080: //+inf
+        uA.ui = 0x0;
+        break;
+    case 0x020: //+ sub
+        sub = true;
+    default: // +num
+        uA.ui = rsqrte7(uA.ui, 5, 10, sub);
+        break;
+    }
+
+    return uA.f;
+}
+
+float32_t f32_rsqrte7(float32_t in)
+{
+    union ui32_f32 uA;
+
+    uA.f = in;
+    unsigned int ret = f32_classify(in);
+    bool sub = false;
+    switch(ret) {
+    case 0x001: // -inf
+    case 0x002: // -normal
+    case 0x004: // -subnormal
+    case 0x100: // sNaN
+        softfloat_exceptionFlags |= softfloat_flag_invalid;
+    case 0x200: //qNaN
+        uA.ui = defaultNaNF32UI;
+        break;
+    case 0x008: // -0
+        uA.ui = 0xff800000;
+        softfloat_exceptionFlags |= softfloat_flag_infinite;
+        break;
+    case 0x010: // +0
+        uA.ui = 0x7f800000;
+        softfloat_exceptionFlags |= softfloat_flag_infinite;
+        break;
+    case 0x080: //+inf
+        uA.ui = 0x0;
+        break;
+    case 0x020: //+ sub
+        sub = true;
+    default: // +num
+        uA.ui = rsqrte7(uA.ui, 8, 23, sub);
+        break;
+    }
+
+    return uA.f;
+}
+
+float64_t f64_rsqrte7(float64_t in)
+{
+    union ui64_f64 uA;
+
+    uA.f = in;
+    unsigned int ret = f64_classify(in);
+    bool sub = false;
+    switch(ret) {
+    case 0x001: // -inf
+    case 0x002: // -normal
+    case 0x004: // -subnormal
+    case 0x100: // sNaN
+        softfloat_exceptionFlags |= softfloat_flag_invalid;
+    case 0x200: //qNaN
+        uA.ui = defaultNaNF64UI;
+        break;
+    case 0x008: // -0
+        uA.ui = 0xfff0000000000000ul;
+        softfloat_exceptionFlags |= softfloat_flag_infinite;
+        break;
+    case 0x010: // +0
+        uA.ui = 0x7ff0000000000000ul;
+        softfloat_exceptionFlags |= softfloat_flag_infinite;
+        break;
+    case 0x080: //+inf
+        uA.ui = 0x0;
+        break;
+    case 0x020: //+ sub
+        sub = true;
+    default: // +num
+        uA.ui = rsqrte7(uA.ui, 11, 52, sub);
+        break;
+    }
+
+    return uA.f;
+}
+
+//user needs to truncate output to required length
+static inline uint64_t recip7(uint64_t val, int e, int s, int rm, bool sub,
+                              bool *round_abnormal)
+{
+    uint64_t exp = extract64(val, s, e);
+    uint64_t sig = extract64(val, 0, s);
+    uint64_t sign = extract64(val, s + e, 1);
+    const int p = 7;
+
+    static const uint8_t table[] = {
+        127, 125, 123, 121, 119, 117, 116, 114,
+        112, 110, 109, 107, 105, 104, 102, 100,
+        99, 97, 96, 94, 93, 91, 90, 88,
+        87, 85, 84, 83, 81, 80, 79, 77,
+        76, 75, 74, 72, 71, 70, 69, 68,
+        66, 65, 64, 63, 62, 61, 60, 59,
+        58, 57, 56, 55, 54, 53, 52, 51,
+        50, 49, 48, 47, 46, 45, 44, 43,
+        42, 41, 40, 40, 39, 38, 37, 36,
+        35, 35, 34, 33, 32, 31, 31, 30,
+        29, 28, 28, 27, 26, 25, 25, 24,
+        23, 23, 22, 21, 21, 20, 19, 19,
+        18, 17, 17, 16, 15, 15, 14, 14,
+        13, 12, 12, 11, 11, 10, 9, 9,
+        8, 8, 7, 7, 6, 5, 5, 4,
+        4, 3, 3, 2, 2, 1, 1, 0};
+
+    if (sub) {
+        while (extract64(sig, s - 1, 1) == 0)
+            exp--, sig <<= 1;
+
+        sig = (sig << 1) & make_mask64(0 ,s);
+
+        if (exp != 0 && exp != UINT64_MAX) {
+            *round_abnormal = true;
+            if (rm == 1 ||
+                (rm == 2 && !sign) ||
+                (rm == 3 && sign))
+                return ((sign << (s+e)) | make_mask64(s, e)) - 1;
+            else
+                return (sign << (s+e)) | make_mask64(s, e);
+        }
+    }
+
+    int idx = sig >> (s-p);
+    uint64_t out_sig = (uint64_t)(table[idx]) << (s-p);
+    uint64_t out_exp = 2 * make_mask64(0, e - 1) + ~exp;
+    if (out_exp == 0 || out_exp == UINT64_MAX) {
+        out_sig = (out_sig >> 1) | make_mask64(s - 1, 1);
+        if (out_exp == UINT64_MAX) {
+            out_sig >>= 1;
+            out_exp = 0;
+        }
+    }
+
+    return (sign << (s+e)) | (out_exp << s) | out_sig;
+}
+
+float16_t f16_recip7(float16_t in)
+{
+    union ui16_f16 uA;
+
+    uA.f = in;
+    unsigned int ret = f16_classify(in);
+    bool sub = false;
+    bool round_abnormal = false;
+    switch(ret) {
+    case 0x001: // -inf
+        uA.ui = 0x8000;
+        break;
+    case 0x080: //+inf
+        uA.ui = 0x0;
+        break;
+    case 0x008: // -0
+        uA.ui = 0xfc00;
+        softfloat_exceptionFlags |= softfloat_flag_infinite;
+        break;
+    case 0x010: // +0
+        uA.ui = 0x7c00;
+        softfloat_exceptionFlags |= softfloat_flag_infinite;
+        break;
+    case 0x100: // sNaN
+        softfloat_exceptionFlags |= softfloat_flag_invalid;
+    case 0x200: //qNaN
+        uA.ui = defaultNaNF16UI;
+        break;
+    case 0x004: // -subnormal
+    case 0x020: //+ sub
+        sub = true;
+    default: // +- normal
+        uA.ui = recip7(uA.ui, 5, 10,
+                       softfloat_roundingMode, sub, &round_abnormal);
+        if (round_abnormal)
+            softfloat_exceptionFlags |= softfloat_flag_inexact |
+                                        softfloat_flag_overflow;
+        break;
+    }
+
+    return uA.f;
+}
+
+float32_t f32_recip7(float32_t in)
+{
+    union ui32_f32 uA;
+
+    uA.f = in;
+    unsigned int ret = f32_classify(in);
+    bool sub = false;
+    bool round_abnormal = false;
+    switch(ret) {
+    case 0x001: // -inf
+        uA.ui = 0x80000000;
+        break;
+    case 0x080: //+inf
+        uA.ui = 0x0;
+        break;
+    case 0x008: // -0
+        uA.ui = 0xff800000;
+        softfloat_exceptionFlags |= softfloat_flag_infinite;
+        break;
+    case 0x010: // +0
+        uA.ui = 0x7f800000;
+        softfloat_exceptionFlags |= softfloat_flag_infinite;
+        break;
+    case 0x100: // sNaN
+        softfloat_exceptionFlags |= softfloat_flag_invalid;
+    case 0x200: //qNaN
+        uA.ui = defaultNaNF32UI;
+        break;
+    case 0x004: // -subnormal
+    case 0x020: //+ sub
+        sub = true;
+    default: // +- normal
+        uA.ui = recip7(uA.ui, 8, 23,
+                       softfloat_roundingMode, sub, &round_abnormal);
+        if (round_abnormal)
+          softfloat_exceptionFlags |= softfloat_flag_inexact |
+                                      softfloat_flag_overflow;
+        break;
+    }
+
+    return uA.f;
+}
+
+float64_t f64_recip7(float64_t in)
+{
+    union ui64_f64 uA;
+
+    uA.f = in;
+    unsigned int ret = f64_classify(in);
+    bool sub = false;
+    bool round_abnormal = false;
+    switch(ret) {
+    case 0x001: // -inf
+        uA.ui = 0x8000000000000000;
+        break;
+    case 0x080: //+inf
+        uA.ui = 0x0;
+        break;
+    case 0x008: // -0
+        uA.ui = 0xfff0000000000000;
+        softfloat_exceptionFlags |= softfloat_flag_infinite;
+        break;
+    case 0x010: // +0
+        uA.ui = 0x7ff0000000000000;
+        softfloat_exceptionFlags |= softfloat_flag_infinite;
+        break;
+    case 0x100: // sNaN
+        softfloat_exceptionFlags |= softfloat_flag_invalid;
+    case 0x200: //qNaN
+        uA.ui = defaultNaNF64UI;
+        break;
+    case 0x004: // -subnormal
+    case 0x020: //+ sub
+        sub = true;
+    default: // +- normal
+        uA.ui = recip7(uA.ui, 11, 52,
+                       softfloat_roundingMode, sub, &round_abnormal);
+        if (round_abnormal)
+            softfloat_exceptionFlags |= softfloat_flag_inexact |
+                                        softfloat_flag_overflow;
+        break;
+    }
+
+    return uA.f;
+}
diff --git a/softfloat/softfloat.h b/softfloat/softfloat.h
index f63b6fc..bdac1be 100644
--- a/softfloat/softfloat.h
+++ b/softfloat/softfloat.h
@@ -179,6 +179,8 @@ bool f16_le_quiet( float16_t, float16_t );
 bool f16_lt_quiet( float16_t, float16_t );
 bool f16_isSignalingNaN( float16_t );
 uint_fast16_t f16_classify( float16_t );
+float16_t f16_rsqrte7( float16_t );
+float16_t f16_recip7( float16_t );
 
 /*----------------------------------------------------------------------------
 | 32-bit (single-precision) floating-point operations.
@@ -219,6 +221,8 @@ bool f32_le_quiet( float32_t, float32_t );
 bool f32_lt_quiet( float32_t, float32_t );
 bool f32_isSignalingNaN( float32_t );
 uint_fast16_t f32_classify( float32_t );
+float32_t f32_rsqrte7( float32_t );
+float32_t f32_recip7( float32_t );
 
 /*----------------------------------------------------------------------------
 | 64-bit (double-precision) floating-point operations.
@@ -257,6 +261,8 @@ bool f64_le_quiet( float64_t, float64_t );
 bool f64_lt_quiet( float64_t, float64_t );
 bool f64_isSignalingNaN( float64_t );
 uint_fast16_t f64_classify( float64_t );
+float64_t f64_rsqrte7( float64_t );
+float64_t f64_recip7( float64_t );
 
 /*----------------------------------------------------------------------------
 | Rounding precision for 80-bit extended double-precision floating-point.
diff --git a/softfloat/softfloat.mk.in b/softfloat/softfloat.mk.in
index 900771e..07dca16 100644
--- a/softfloat/softfloat.mk.in
+++ b/softfloat/softfloat.mk.in
@@ -125,6 +125,7 @@ softfloat_c_srcs = \
 	f64_to_ui64.c \
 	f64_to_ui64_r_minMag.c \
 	fall_maxmin.c \
+	fall_reciprocal.c \
 	i32_to_f128.c \
 	i32_to_f16.c \
 	i32_to_f32.c \
author	Chih-Min Chao <chihmin.chao@sifive.com>	2020-08-25 21:24:25 -0700
committer	Chih-Min Chao <chihmin.chao@sifive.com>	2020-08-31 00:44:35 -0700
commit	eceda60356a435dfac2c5573badf0144f4c7f146 (patch)
tree	c9fb5d98c196f05c488103860227a58537f496ba
parent	e11db4e6c978eb20a2be2bb253755703231971e3 (diff)
download	spike-eceda60356a435dfac2c5573badf0144f4c7f146.zip spike-eceda60356a435dfac2c5573badf0144f4c7f146.tar.gz spike-eceda60356a435dfac2c5573badf0144f4c7f146.tar.bz2