diff options
author | Hans-Peter Nilsson <hp@axis.com> | 2013-10-16 01:43:14 +0000 |
---|---|---|
committer | Hans-Peter Nilsson <hp@gcc.gnu.org> | 2013-10-16 01:43:14 +0000 |
commit | 0e499e759cbc393de610e7023c483d0ca0a8a7b0 (patch) | |
tree | c5d3a341da560ba98e56324e917e4ecf6c089531 /libgcc/config/cris/mulsi3.S | |
parent | b82d0df95b6592adfb3f8c4872fdad41c513e7c5 (diff) | |
download | gcc-0e499e759cbc393de610e7023c483d0ca0a8a7b0.zip gcc-0e499e759cbc393de610e7023c483d0ca0a8a7b0.tar.gz gcc-0e499e759cbc393de610e7023c483d0ca0a8a7b0.tar.bz2 |
For CRIS ports, switch to soft-fp. Improve arit.c and longlong.h.
* config.host (cpu_type) <Setting default>: Add entry for
crisv32-*-*.
(tmake_file) <crisv32-*-elf, cris-*-elf, cris-*-linux*>
<crisv32-*-linux*>: Adjust.
* longlong.h: Wrap the whole CRIS section in a single
defined(__CRIS__) conditional. Add comment about add_ssaaaa
and sub_ddmmss.
(COUNT_LEADING_ZEROS_0): Define when count_leading_zeros is
defined.
[__CRIS__] (__umulsidi3): Define.
[__CRIS__] (umul_ppmm): Define in terms of __umulsidi3.
* config/cris/sfp-machine.h: New file.
* config/cris/umulsidi3.S: New file.
* config/cris/t-elfmulti (LIB2ADD_ST): Add umulsidi3.S.
* config/cris/arit.c (SIGNMULT): New macro.
(__Div, __Mod): Use SIGNMULT instead of naked multiplication.
* config/cris/mulsi3.S: Tweak to avoid redundant register-copying;
saving 3 out of originally 33 cycles from the fastest
path, 3 out of 54 from the medium path and one from the longest
path. Improve comments.
From-SVN: r203640
Diffstat (limited to 'libgcc/config/cris/mulsi3.S')
-rw-r--r-- | libgcc/config/cris/mulsi3.S | 78 |
1 files changed, 46 insertions, 32 deletions
diff --git a/libgcc/config/cris/mulsi3.S b/libgcc/config/cris/mulsi3.S index 8ff76e5..734e162 100644 --- a/libgcc/config/cris/mulsi3.S +++ b/libgcc/config/cris/mulsi3.S @@ -113,16 +113,22 @@ ___Mul: ret nop #else - move.d $r10,$r12 +;; See if we can avoid multiplying some of the parts, knowing +;; they're zero. + move.d $r11,$r9 - bound.d $r12,$r9 + bound.d $r10,$r9 cmpu.w 65535,$r9 bls L(L3) - move.d $r12,$r13 + move.d $r10,$r12 - movu.w $r11,$r9 +;; Nope, have to do all the parts of a 32-bit multiplication. +;; See head comment in optabs.c:expand_doubleword_mult. + + move.d $r10,$r13 + movu.w $r11,$r9 ; ab*cd = (a*d + b*c)<<16 + b*d lslq 16,$r13 - mstep $r9,$r13 + mstep $r9,$r13 ; d*b mstep $r9,$r13 mstep $r9,$r13 mstep $r9,$r13 @@ -140,7 +146,7 @@ ___Mul: mstep $r9,$r13 clear.w $r10 test.d $r10 - mstep $r9,$r10 + mstep $r9,$r10 ; d*a mstep $r9,$r10 mstep $r9,$r10 mstep $r9,$r10 @@ -157,10 +163,9 @@ ___Mul: mstep $r9,$r10 mstep $r9,$r10 movu.w $r12,$r12 - move.d $r11,$r9 - clear.w $r9 - test.d $r9 - mstep $r12,$r9 + clear.w $r11 + move.d $r11,$r9 ; Doubles as a "test.d" preparing for the mstep. + mstep $r12,$r9 ; b*c mstep $r12,$r9 mstep $r12,$r9 mstep $r12,$r9 @@ -182,17 +187,24 @@ ___Mul: add.d $r13,$r10 L(L3): - move.d $r9,$r10 +;; Form the maximum in $r10, by knowing the minimum, $r9. +;; (We don't know which one of $r10 or $r11 it is.) +;; Check if the largest operand is still just 16 bits. + + xor $r9,$r10 xor $r11,$r10 - xor $r12,$r10 cmpu.w 65535,$r10 bls L(L5) movu.w $r9,$r13 - movu.w $r13,$r13 +;; We have ab*cd = (a*c)<<32 + (a*d + b*c)<<16 + b*d, but c==0 +;; so we only need (a*d)<<16 + b*d with d = $r13, ab = $r10. +;; We drop the upper part of (a*d)<<16 as we're only doing a +;; 32-bit-result multiplication. + move.d $r10,$r9 lslq 16,$r9 - mstep $r13,$r9 + mstep $r13,$r9 ; b*d mstep $r13,$r9 mstep $r13,$r9 mstep $r13,$r9 @@ -210,7 +222,7 @@ L(L3): mstep $r13,$r9 clear.w $r10 test.d $r10 - mstep $r13,$r10 + mstep $r13,$r10 ; a*d mstep $r13,$r10 mstep $r13,$r10 mstep $r13,$r10 @@ -231,25 +243,27 @@ L(L3): add.d $r9,$r10 L(L5): - movu.w $r9,$r9 +;; We have ab*cd = (a*c)<<32 + (a*d + b*c)<<16 + b*d, but a and c==0 +;; so b*d (with b=$r13, a=$r10) it is. + lslq 16,$r10 - mstep $r9,$r10 - mstep $r9,$r10 - mstep $r9,$r10 - mstep $r9,$r10 - mstep $r9,$r10 - mstep $r9,$r10 - mstep $r9,$r10 - mstep $r9,$r10 - mstep $r9,$r10 - mstep $r9,$r10 - mstep $r9,$r10 - mstep $r9,$r10 - mstep $r9,$r10 - mstep $r9,$r10 - mstep $r9,$r10 + mstep $r13,$r10 + mstep $r13,$r10 + mstep $r13,$r10 + mstep $r13,$r10 + mstep $r13,$r10 + mstep $r13,$r10 + mstep $r13,$r10 + mstep $r13,$r10 + mstep $r13,$r10 + mstep $r13,$r10 + mstep $r13,$r10 + mstep $r13,$r10 + mstep $r13,$r10 + mstep $r13,$r10 + mstep $r13,$r10 ret - mstep $r9,$r10 + mstep $r13,$r10 #endif L(Lfe1): .size ___Mul,L(Lfe1)-___Mul |