aboutsummaryrefslogtreecommitdiff
path: root/libgcc
diff options
context:
space:
mode:
authorDing-Kai Chen <dkchen@cadence.com>2016-10-18 19:06:33 +0000
committerMax Filippov <jcmvbkbc@gcc.gnu.org>2016-10-18 19:06:33 +0000
commit66192aa1294360c2522e2e30fc45f84a81719419 (patch)
treea2d0d16150c035c44a7b6c0b5c853224f80ee657 /libgcc
parent6dddab0845c9056db22ecb86c12564244fa0f911 (diff)
downloadgcc-66192aa1294360c2522e2e30fc45f84a81719419.zip
gcc-66192aa1294360c2522e2e30fc45f84a81719419.tar.gz
gcc-66192aa1294360c2522e2e30fc45f84a81719419.tar.bz2
xtensa: add HW FPU sequences for DIV/SQRT/RECIP/RSQRT
Use new FPU instruction sequences documented in the ISA book to implement __divsf3, __divdf3, __recipsf2, __recipdf2, __rsqrtsf2, __rsqrtdf2 and __ieee754_sqrtf and __ieee754_sqrt. 2016-10-18 Ding-Kai Chen <dkchen@cadence.com> libgcc/ * config/xtensa/ieee754-df.S (__recipdf2, __rsqrtdf2, __ieee754_sqrt): New functions. (__divdf3): Add implementation with new FPU instructions under #if XCHAL_HAVE_DFP_DIV. * config/xtensa/ieee754-sf.S (__recipsf2, __rsqrtsf2, __ieee754_sqrtf): New functions. (__divsf3): Add implementation with new FPU instructions under #if XCHAL_HAVE_FP_DIV. * config/xtensa/t-xtensa (LIB1ASMFUNCS): Add _sqrtf, _recipsf2 _rsqrtsf2, _sqrt, _recipdf2 and _rsqrtdf2. From-SVN: r241312
Diffstat (limited to 'libgcc')
-rw-r--r--libgcc/ChangeLog13
-rw-r--r--libgcc/config/xtensa/ieee754-df.S178
-rw-r--r--libgcc/config/xtensa/ieee754-sf.S156
-rw-r--r--libgcc/config/xtensa/t-xtensa4
4 files changed, 348 insertions, 3 deletions
diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog
index c861ca6..b98f6d4 100644
--- a/libgcc/ChangeLog
+++ b/libgcc/ChangeLog
@@ -1,3 +1,16 @@
+2016-10-18 Ding-Kai Chen <dkchen@cadence.com>
+
+ * config/xtensa/ieee754-df.S (__recipdf2, __rsqrtdf2,
+ __ieee754_sqrt): New functions.
+ (__divdf3): Add implementation with new FPU instructions under
+ #if XCHAL_HAVE_DFP_DIV.
+ * config/xtensa/ieee754-sf.S (__recipsf2, __rsqrtsf2,
+ __ieee754_sqrtf): New functions.
+ (__divsf3): Add implementation with new FPU instructions under
+ #if XCHAL_HAVE_FP_DIV.
+ * config/xtensa/t-xtensa (LIB1ASMFUNCS): Add _sqrtf, _recipsf2
+ _rsqrtsf2, _sqrt, _recipdf2 and _rsqrtdf2.
+
2016-10-13 Thomas Preud'homme <thomas.preudhomme@arm.com>
* libgcov-profiler.c: Replace MEMMODEL_* macros by their __ATOMIC_*
diff --git a/libgcc/config/xtensa/ieee754-df.S b/libgcc/config/xtensa/ieee754-df.S
index 1d9ef46..a2fb166 100644
--- a/libgcc/config/xtensa/ieee754-df.S
+++ b/libgcc/config/xtensa/ieee754-df.S
@@ -1217,8 +1217,58 @@ __muldf3:
#ifdef L_divdf3
- .literal_position
/* Division */
+
+#if XCHAL_HAVE_DFP_DIV
+
+ .text
+ .align 4
+ .global __divdf3
+ .type __divdf3, @function
+__divdf3:
+ leaf_entry sp, 16
+
+ wfrd f1, xh, xl
+ wfrd f2, yh, yl
+
+ div0.d f3, f2
+ nexp01.d f4, f2
+ const.d f0, 1
+ maddn.d f0, f4, f3
+ const.d f5, 0
+ mov.d f7, f2
+ mkdadj.d f7, f1
+ maddn.d f3, f0, f3
+ maddn.d f5, f0, f0
+ nexp01.d f1, f1
+ div0.d f2, f2
+ maddn.d f3, f5, f3
+ const.d f5, 1
+ const.d f0, 0
+ neg.d f6, f1
+ maddn.d f5, f4, f3
+ maddn.d f0, f6, f2
+ maddn.d f3, f5, f3
+ maddn.d f6, f4, f0
+ const.d f2, 1
+ maddn.d f2, f4, f3
+ maddn.d f0, f6, f3
+ neg.d f1, f1
+ maddn.d f3, f2, f3
+ maddn.d f1, f4, f0
+ addexpm.d f0, f7
+ addexp.d f3, f7
+ divn.d f0, f1, f3
+
+ rfr xl, f0
+ rfrd xh, f0
+
+ leaf_return
+
+#else
+
+ .literal_position
+
__divdf3_aux:
/* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
@@ -1537,6 +1587,8 @@ __divdf3:
movi xl, 0
leaf_return
+#endif /* XCHAL_HAVE_DFP_DIV */
+
#endif /* L_divdf3 */
#ifdef L_cmpdf2
@@ -2388,3 +2440,127 @@ __extendsfdf2:
#endif /* L_extendsfdf2 */
+#if XCHAL_HAVE_DFP_SQRT
+#ifdef L_sqrt
+
+ .text
+ .align 4
+ .global __ieee754_sqrt
+ .type __ieee754_sqrt, @function
+__ieee754_sqrt:
+ leaf_entry sp, 16
+
+ wfrd f1, xh, xl
+
+ sqrt0.d f2, f1
+ const.d f4, 0
+ maddn.d f4, f2, f2
+ nexp01.d f3, f1
+ const.d f0, 3
+ addexp.d f3, f0
+ maddn.d f0, f4, f3
+ nexp01.d f4, f1
+ maddn.d f2, f0, f2
+ const.d f5, 0
+ maddn.d f5, f2, f3
+ const.d f0, 3
+ maddn.d f0, f5, f2
+ neg.d f6, f4
+ maddn.d f2, f0, f2
+ const.d f0, 0
+ const.d f5, 0
+ const.d f7, 0
+ maddn.d f0, f6, f2
+ maddn.d f5, f2, f3
+ const.d f3, 3
+ maddn.d f7, f3, f2
+ maddn.d f4, f0, f0
+ maddn.d f3, f5, f2
+ neg.d f2, f7
+ maddn.d f0, f4, f2
+ maddn.d f7, f3, f7
+ mksadj.d f2, f1
+ nexp01.d f1, f1
+ maddn.d f1, f0, f0
+ neg.d f3, f7
+ addexpm.d f0, f2
+ addexp.d f3, f2
+ divn.d f0, f1, f3
+
+ rfr xl, f0
+ rfrd xh, f0
+
+ leaf_return
+
+#endif /* L_sqrt */
+#endif /* XCHAL_HAVE_DFP_SQRT */
+
+#if XCHAL_HAVE_DFP_RECIP
+#ifdef L_recipdf2
+ /* Reciprocal */
+
+ .align 4
+ .global __recipdf2
+ .type __recipdf2, @function
+__recipdf2:
+ leaf_entry sp, 16
+
+ wfrd f1, xh, xl
+
+ recip0.d f0, f1
+ const.d f2, 2
+ msub.d f2, f1, f0
+ mul.d f3, f1, f0
+ const.d f4, 2
+ mul.d f5, f0, f2
+ msub.d f4, f3, f2
+ const.d f2, 1
+ mul.d f0, f5, f4
+ msub.d f2, f1, f0
+ maddn.d f0, f0, f2
+
+ rfr xl, f0
+ rfrd xh, f0
+
+ leaf_return
+
+#endif /* L_recipdf2 */
+#endif /* XCHAL_HAVE_DFP_RECIP */
+
+#if XCHAL_HAVE_DFP_RSQRT
+#ifdef L_rsqrtdf2
+ /* Reciprocal square root */
+
+ .align 4
+ .global __rsqrtdf2
+ .type __rsqrtdf2, @function
+__rsqrtdf2:
+ leaf_entry sp, 16
+
+ wfrd f1, xh, xl
+
+ rsqrt0.d f0, f1
+ mul.d f2, f1, f0
+ const.d f3, 3
+ mul.d f4, f3, f0
+ const.d f5, 1
+ msub.d f5, f2, f0
+ maddn.d f0, f4, f5
+ const.d f2, 1
+ mul.d f4, f1, f0
+ mul.d f5, f3, f0
+ msub.d f2, f4, f0
+ maddn.d f0, f5, f2
+ const.d f2, 1
+ mul.d f1, f1, f0
+ mul.d f3, f3, f0
+ msub.d f2, f1, f0
+ maddn.d f0, f3, f2
+
+ rfr xl, f0
+ rfrd xh, f0
+
+ leaf_return
+
+#endif /* L_rsqrtdf2 */
+#endif /* XCHAL_HAVE_DFP_RSQRT */
diff --git a/libgcc/config/xtensa/ieee754-sf.S b/libgcc/config/xtensa/ieee754-sf.S
index 7864a74..6acad59 100644
--- a/libgcc/config/xtensa/ieee754-sf.S
+++ b/libgcc/config/xtensa/ieee754-sf.S
@@ -885,8 +885,52 @@ __mulsf3:
#ifdef L_divsf3
- .literal_position
/* Division */
+
+#if XCHAL_HAVE_FP_DIV
+
+ .align 4
+ .global __divsf3
+ .type __divsf3, @function
+__divsf3:
+ leaf_entry sp, 16
+
+ wfr f1, a2 /* dividend */
+ wfr f2, a3 /* divisor */
+
+ div0.s f3, f2
+ nexp01.s f4, f2
+ const.s f5, 1
+ maddn.s f5, f4, f3
+ mov.s f6, f3
+ mov.s f7, f2
+ nexp01.s f2, f1
+ maddn.s f6, f5, f6
+ const.s f5, 1
+ const.s f0, 0
+ neg.s f8, f2
+ maddn.s f5, f4, f6
+ maddn.s f0, f8, f3
+ mkdadj.s f7, f1
+ maddn.s f6, f5, f6
+ maddn.s f8, f4, f0
+ const.s f3, 1
+ maddn.s f3, f4, f6
+ maddn.s f0, f8, f6
+ neg.s f2, f2
+ maddn.s f6, f3, f6
+ maddn.s f2, f4, f0
+ addexpm.s f0, f7
+ addexp.s f6, f7
+ divn.s f0, f2, f6
+
+ rfr a2, f0
+
+ leaf_return
+
+#else
+
+ .literal_position
__divsf3_aux:
/* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
@@ -1112,6 +1156,8 @@ __divsf3:
slli a2, a2, 31
leaf_return
+#endif /* XCHAL_HAVE_FP_DIV */
+
#endif /* L_divsf3 */
#ifdef L_cmpsf2
@@ -1757,3 +1803,111 @@ __floatdisf:
leaf_return
#endif /* L_floatdisf */
+
+#if XCHAL_HAVE_FP_SQRT
+#ifdef L_sqrtf
+ /* Square root */
+
+ .align 4
+ .global __ieee754_sqrtf
+ .type __ieee754_sqrtf, @function
+__ieee754_sqrtf:
+ leaf_entry sp, 16
+
+ wfr f1, a2
+
+ sqrt0.s f2, f1
+ const.s f3, 0
+ maddn.s f3, f2, f2
+ nexp01.s f4, f1
+ const.s f0, 3
+ addexp.s f4, f0
+ maddn.s f0, f3, f4
+ nexp01.s f3, f1
+ neg.s f5, f3
+ maddn.s f2, f0, f2
+ const.s f0, 0
+ const.s f6, 0
+ const.s f7, 0
+ maddn.s f0, f5, f2
+ maddn.s f6, f2, f4
+ const.s f4, 3
+ maddn.s f7, f4, f2
+ maddn.s f3, f0, f0
+ maddn.s f4, f6, f2
+ neg.s f2, f7
+ maddn.s f0, f3, f2
+ maddn.s f7, f4, f7
+ mksadj.s f2, f1
+ nexp01.s f1, f1
+ maddn.s f1, f0, f0
+ neg.s f3, f7
+ addexpm.s f0, f2
+ addexp.s f3, f2
+ divn.s f0, f1, f3
+
+ rfr a2, f0
+
+ leaf_return
+
+#endif /* L_sqrtf */
+#endif /* XCHAL_HAVE_FP_SQRT */
+
+#if XCHAL_HAVE_FP_RECIP
+#ifdef L_recipsf2
+ /* Reciprocal */
+
+ .align 4
+ .global __recipsf2
+ .type __recipsf2, @function
+__recipsf2:
+ leaf_entry sp, 16
+
+ wfr f1, a2
+
+ recip0.s f0, f1
+ const.s f2, 1
+ msub.s f2, f1, f0
+ maddn.s f0, f0, f2
+ const.s f2, 1
+ msub.s f2, f1, f0
+ maddn.s f0, f0, f2
+
+ rfr a2, f0
+
+ leaf_return
+
+#endif /* L_recipsf2 */
+#endif /* XCHAL_HAVE_FP_RECIP */
+
+#if XCHAL_HAVE_FP_RSQRT
+#ifdef L_rsqrtsf2
+ /* Reciprocal square root */
+
+ .align 4
+ .global __rsqrtsf2
+ .type __rsqrtsf2, @function
+__rsqrtsf2:
+ leaf_entry sp, 16
+
+ wfr f1, a2
+
+ rsqrt0.s f0, f1
+ mul.s f2, f1, f0
+ const.s f3, 3;
+ mul.s f4, f3, f0
+ const.s f5, 1
+ msub.s f5, f2, f0
+ maddn.s f0, f4, f5
+ mul.s f2, f1, f0
+ mul.s f1, f3, f0
+ const.s f3, 1
+ msub.s f3, f2, f0
+ maddn.s f0, f1, f3
+
+ rfr a2, f0
+
+ leaf_return
+
+#endif /* L_rsqrtsf2 */
+#endif /* XCHAL_HAVE_FP_RSQRT */
diff --git a/libgcc/config/xtensa/t-xtensa b/libgcc/config/xtensa/t-xtensa
index ed3eb84..90df5f1 100644
--- a/libgcc/config/xtensa/t-xtensa
+++ b/libgcc/config/xtensa/t-xtensa
@@ -4,10 +4,12 @@ LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3 _udivsi3 _umodsi3 \
_ashldi3 _ashrdi3 _lshrdi3 \
_negsf2 _addsubsf3 _mulsf3 _divsf3 _cmpsf2 _fixsfsi _fixsfdi \
_fixunssfsi _fixunssfdi _floatsisf _floatunsisf \
+ _sqrtf _recipsf2 _rsqrtsf2 \
_floatdisf _floatundisf \
_negdf2 _addsubdf3 _muldf3 _divdf3 _cmpdf2 _fixdfsi _fixdfdi \
_fixunsdfsi _fixunsdfdi _floatsidf _floatunsidf \
_floatdidf _floatundidf \
- _truncdfsf2 _extendsfdf2
+ _truncdfsf2 _extendsfdf2 \
+ _sqrt _recipdf2 _rsqrtdf2
LIB2ADD = $(srcdir)/config/xtensa/lib2funcs.S