aboutsummaryrefslogtreecommitdiff
path: root/libgcc/config/cris/mulsi3.S
diff options
context:
space:
mode:
authorHans-Peter Nilsson <hp@axis.com>2013-10-16 01:43:14 +0000
committerHans-Peter Nilsson <hp@gcc.gnu.org>2013-10-16 01:43:14 +0000
commit0e499e759cbc393de610e7023c483d0ca0a8a7b0 (patch)
treec5d3a341da560ba98e56324e917e4ecf6c089531 /libgcc/config/cris/mulsi3.S
parentb82d0df95b6592adfb3f8c4872fdad41c513e7c5 (diff)
downloadgcc-0e499e759cbc393de610e7023c483d0ca0a8a7b0.zip
gcc-0e499e759cbc393de610e7023c483d0ca0a8a7b0.tar.gz
gcc-0e499e759cbc393de610e7023c483d0ca0a8a7b0.tar.bz2
For CRIS ports, switch to soft-fp. Improve arit.c and longlong.h.
* config.host (cpu_type) <Setting default>: Add entry for crisv32-*-*. (tmake_file) <crisv32-*-elf, cris-*-elf, cris-*-linux*> <crisv32-*-linux*>: Adjust. * longlong.h: Wrap the whole CRIS section in a single defined(__CRIS__) conditional. Add comment about add_ssaaaa and sub_ddmmss. (COUNT_LEADING_ZEROS_0): Define when count_leading_zeros is defined. [__CRIS__] (__umulsidi3): Define. [__CRIS__] (umul_ppmm): Define in terms of __umulsidi3. * config/cris/sfp-machine.h: New file. * config/cris/umulsidi3.S: New file. * config/cris/t-elfmulti (LIB2ADD_ST): Add umulsidi3.S. * config/cris/arit.c (SIGNMULT): New macro. (__Div, __Mod): Use SIGNMULT instead of naked multiplication. * config/cris/mulsi3.S: Tweak to avoid redundant register-copying; saving 3 out of originally 33 cycles from the fastest path, 3 out of 54 from the medium path and one from the longest path. Improve comments. From-SVN: r203640
Diffstat (limited to 'libgcc/config/cris/mulsi3.S')
-rw-r--r--libgcc/config/cris/mulsi3.S78
1 files changed, 46 insertions, 32 deletions
diff --git a/libgcc/config/cris/mulsi3.S b/libgcc/config/cris/mulsi3.S
index 8ff76e5..734e162 100644
--- a/libgcc/config/cris/mulsi3.S
+++ b/libgcc/config/cris/mulsi3.S
@@ -113,16 +113,22 @@ ___Mul:
ret
nop
#else
- move.d $r10,$r12
+;; See if we can avoid multiplying some of the parts, knowing
+;; they're zero.
+
move.d $r11,$r9
- bound.d $r12,$r9
+ bound.d $r10,$r9
cmpu.w 65535,$r9
bls L(L3)
- move.d $r12,$r13
+ move.d $r10,$r12
- movu.w $r11,$r9
+;; Nope, have to do all the parts of a 32-bit multiplication.
+;; See head comment in optabs.c:expand_doubleword_mult.
+
+ move.d $r10,$r13
+ movu.w $r11,$r9 ; ab*cd = (a*d + b*c)<<16 + b*d
lslq 16,$r13
- mstep $r9,$r13
+ mstep $r9,$r13 ; d*b
mstep $r9,$r13
mstep $r9,$r13
mstep $r9,$r13
@@ -140,7 +146,7 @@ ___Mul:
mstep $r9,$r13
clear.w $r10
test.d $r10
- mstep $r9,$r10
+ mstep $r9,$r10 ; d*a
mstep $r9,$r10
mstep $r9,$r10
mstep $r9,$r10
@@ -157,10 +163,9 @@ ___Mul:
mstep $r9,$r10
mstep $r9,$r10
movu.w $r12,$r12
- move.d $r11,$r9
- clear.w $r9
- test.d $r9
- mstep $r12,$r9
+ clear.w $r11
+ move.d $r11,$r9 ; Doubles as a "test.d" preparing for the mstep.
+ mstep $r12,$r9 ; b*c
mstep $r12,$r9
mstep $r12,$r9
mstep $r12,$r9
@@ -182,17 +187,24 @@ ___Mul:
add.d $r13,$r10
L(L3):
- move.d $r9,$r10
+;; Form the maximum in $r10, by knowing the minimum, $r9.
+;; (We don't know which one of $r10 or $r11 it is.)
+;; Check if the largest operand is still just 16 bits.
+
+ xor $r9,$r10
xor $r11,$r10
- xor $r12,$r10
cmpu.w 65535,$r10
bls L(L5)
movu.w $r9,$r13
- movu.w $r13,$r13
+;; We have ab*cd = (a*c)<<32 + (a*d + b*c)<<16 + b*d, but c==0
+;; so we only need (a*d)<<16 + b*d with d = $r13, ab = $r10.
+;; We drop the upper part of (a*d)<<16 as we're only doing a
+;; 32-bit-result multiplication.
+
move.d $r10,$r9
lslq 16,$r9
- mstep $r13,$r9
+ mstep $r13,$r9 ; b*d
mstep $r13,$r9
mstep $r13,$r9
mstep $r13,$r9
@@ -210,7 +222,7 @@ L(L3):
mstep $r13,$r9
clear.w $r10
test.d $r10
- mstep $r13,$r10
+ mstep $r13,$r10 ; a*d
mstep $r13,$r10
mstep $r13,$r10
mstep $r13,$r10
@@ -231,25 +243,27 @@ L(L3):
add.d $r9,$r10
L(L5):
- movu.w $r9,$r9
+;; We have ab*cd = (a*c)<<32 + (a*d + b*c)<<16 + b*d, but a and c==0
+;; so b*d (with b=$r13, a=$r10) it is.
+
lslq 16,$r10
- mstep $r9,$r10
- mstep $r9,$r10
- mstep $r9,$r10
- mstep $r9,$r10
- mstep $r9,$r10
- mstep $r9,$r10
- mstep $r9,$r10
- mstep $r9,$r10
- mstep $r9,$r10
- mstep $r9,$r10
- mstep $r9,$r10
- mstep $r9,$r10
- mstep $r9,$r10
- mstep $r9,$r10
- mstep $r9,$r10
+ mstep $r13,$r10
+ mstep $r13,$r10
+ mstep $r13,$r10
+ mstep $r13,$r10
+ mstep $r13,$r10
+ mstep $r13,$r10
+ mstep $r13,$r10
+ mstep $r13,$r10
+ mstep $r13,$r10
+ mstep $r13,$r10
+ mstep $r13,$r10
+ mstep $r13,$r10
+ mstep $r13,$r10
+ mstep $r13,$r10
+ mstep $r13,$r10
ret
- mstep $r9,$r10
+ mstep $r13,$r10
#endif
L(Lfe1):
.size ___Mul,L(Lfe1)-___Mul