For CRIS ports, switch to soft-fp. Improve arit.c and longlong.h.

* config.host (cpu_type) <Setting default>: Add entry for crisv32-*-*. (tmake_file) <crisv32-*-elf, cris-*-elf, cris-*-linux*> <crisv32-*-linux*>: Adjust. * longlong.h: Wrap the whole CRIS section in a single defined(__CRIS__) conditional. Add comment about add_ssaaaa and sub_ddmmss. (COUNT_LEADING_ZEROS_0): Define when count_leading_zeros is defined. [__CRIS__] (__umulsidi3): Define. [__CRIS__] (umul_ppmm): Define in terms of __umulsidi3. * config/cris/sfp-machine.h: New file. * config/cris/umulsidi3.S: New file. * config/cris/t-elfmulti (LIB2ADD_ST): Add umulsidi3.S. * config/cris/arit.c (SIGNMULT): New macro. (__Div, __Mod): Use SIGNMULT instead of naked multiplication. * config/cris/mulsi3.S: Tweak to avoid redundant register-copying; saving 3 out of originally 33 cycles from the fastest path, 3 out of 54 from the medium path and one from the longest path. Improve comments. From-SVN: r203640
author: Hans-Peter Nilsson <hp@axis.com> 2013-10-16 01:43:14 +0000
committer: Hans-Peter Nilsson <hp@gcc.gnu.org> 2013-10-16 01:43:14 +0000
commit: 0e499e759cbc393de610e7023c483d0ca0a8a7b0 (patch)
tree: c5d3a341da560ba98e56324e917e4ecf6c089531 /libgcc/config/cris/mulsi3.S
parent: b82d0df95b6592adfb3f8c4872fdad41c513e7c5 (diff)
download: gcc-0e499e759cbc393de610e7023c483d0ca0a8a7b0.zip
gcc-0e499e759cbc393de610e7023c483d0ca0a8a7b0.tar.gz
gcc-0e499e759cbc393de610e7023c483d0ca0a8a7b0.tar.bz2
1 files changed, 46 insertions, 32 deletions
diff --git a/libgcc/config/cris/mulsi3.S b/libgcc/config/cris/mulsi3.S
index 8ff76e5..734e162 100644
--- a/libgcc/config/cris/mulsi3.S
+++ b/libgcc/config/cris/mulsi3.S
@@ -113,16 +113,22 @@ ___Mul:
 	ret
 	nop
 #else
-	move.d $r10,$r12
+;; See if we can avoid multiplying some of the parts, knowing
+;; they're zero.
+
 	move.d $r11,$r9
-	bound.d $r12,$r9
+	bound.d $r10,$r9
 	cmpu.w 65535,$r9
 	bls L(L3)
-	move.d $r12,$r13
+	move.d $r10,$r12
 
-	movu.w $r11,$r9
+;; Nope, have to do all the parts of a 32-bit multiplication.
+;; See head comment in optabs.c:expand_doubleword_mult.
+
+	move.d $r10,$r13
+	movu.w $r11,$r9 ; ab*cd = (a*d + b*c)<<16 + b*d
 	lslq 16,$r13
-	mstep $r9,$r13
+	mstep $r9,$r13	; d*b
 	mstep $r9,$r13
 	mstep $r9,$r13
 	mstep $r9,$r13
@@ -140,7 +146,7 @@ ___Mul:
 	mstep $r9,$r13
 	clear.w $r10
 	test.d $r10
-	mstep $r9,$r10
+	mstep $r9,$r10	; d*a
 	mstep $r9,$r10
 	mstep $r9,$r10
 	mstep $r9,$r10
@@ -157,10 +163,9 @@ ___Mul:
 	mstep $r9,$r10
 	mstep $r9,$r10
 	movu.w $r12,$r12
-	move.d $r11,$r9
-	clear.w $r9
-	test.d $r9
-	mstep $r12,$r9
+	clear.w $r11
+	move.d $r11,$r9 ; Doubles as a "test.d" preparing for the mstep.
+	mstep $r12,$r9	; b*c
 	mstep $r12,$r9
 	mstep $r12,$r9
 	mstep $r12,$r9
@@ -182,17 +187,24 @@ ___Mul:
 	add.d $r13,$r10
 
 L(L3):
-	move.d $r9,$r10
+;; Form the maximum in $r10, by knowing the minimum, $r9.
+;; (We don't know which one of $r10 or $r11 it is.)
+;; Check if the largest operand is still just 16 bits.
+
+	xor $r9,$r10
 	xor $r11,$r10
-	xor $r12,$r10
 	cmpu.w 65535,$r10
 	bls L(L5)
 	movu.w $r9,$r13
 
-	movu.w $r13,$r13
+;; We have ab*cd = (a*c)<<32 + (a*d + b*c)<<16 + b*d, but c==0
+;; so we only need (a*d)<<16 + b*d with d = $r13, ab = $r10.
+;; We drop the upper part of (a*d)<<16 as we're only doing a
+;; 32-bit-result multiplication.
+
 	move.d $r10,$r9
 	lslq 16,$r9
-	mstep $r13,$r9
+	mstep $r13,$r9	; b*d
 	mstep $r13,$r9
 	mstep $r13,$r9
 	mstep $r13,$r9
@@ -210,7 +222,7 @@ L(L3):
 	mstep $r13,$r9
 	clear.w $r10
 	test.d $r10
-	mstep $r13,$r10
+	mstep $r13,$r10	; a*d
 	mstep $r13,$r10
 	mstep $r13,$r10
 	mstep $r13,$r10
@@ -231,25 +243,27 @@ L(L3):
 	add.d $r9,$r10
 
 L(L5):
-	movu.w $r9,$r9
+;; We have ab*cd = (a*c)<<32 + (a*d + b*c)<<16 + b*d, but a and c==0
+;; so b*d (with b=$r13, a=$r10) it is.
+
 	lslq 16,$r10
-	mstep $r9,$r10
-	mstep $r9,$r10
-	mstep $r9,$r10
-	mstep $r9,$r10
-	mstep $r9,$r10
-	mstep $r9,$r10
-	mstep $r9,$r10
-	mstep $r9,$r10
-	mstep $r9,$r10
-	mstep $r9,$r10
-	mstep $r9,$r10
-	mstep $r9,$r10
-	mstep $r9,$r10
-	mstep $r9,$r10
-	mstep $r9,$r10
+	mstep $r13,$r10
+	mstep $r13,$r10
+	mstep $r13,$r10
+	mstep $r13,$r10
+	mstep $r13,$r10
+	mstep $r13,$r10
+	mstep $r13,$r10
+	mstep $r13,$r10
+	mstep $r13,$r10
+	mstep $r13,$r10
+	mstep $r13,$r10
+	mstep $r13,$r10
+	mstep $r13,$r10
+	mstep $r13,$r10
+	mstep $r13,$r10
 	ret
-	mstep $r9,$r10
+	mstep $r13,$r10
 #endif
 L(Lfe1):
 	.size	___Mul,L(Lfe1)-___Mul
author	Hans-Peter Nilsson <hp@axis.com>	2013-10-16 01:43:14 +0000
committer	Hans-Peter Nilsson <hp@gcc.gnu.org>	2013-10-16 01:43:14 +0000
commit	0e499e759cbc393de610e7023c483d0ca0a8a7b0 (patch)
tree	c5d3a341da560ba98e56324e917e4ecf6c089531 /libgcc/config/cris/mulsi3.S
parent	b82d0df95b6592adfb3f8c4872fdad41c513e7c5 (diff)
download	gcc-0e499e759cbc393de610e7023c483d0ca0a8a7b0.zip gcc-0e499e759cbc393de610e7023c483d0ca0a8a7b0.tar.gz gcc-0e499e759cbc393de610e7023c483d0ca0a8a7b0.tar.bz2