aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/v850
diff options
context:
space:
mode:
authorNick Clifton <nickc@redhat.com>2001-01-06 00:46:33 +0000
committerNick Clifton <nickc@gcc.gnu.org>2001-01-06 00:46:33 +0000
commit0426e0499783df11c3e046939f3edccd1546a60f (patch)
tree609a6abcb5a8f3faf747a0e22baa46caa67a31a7 /gcc/config/v850
parentf7114e1750e9f65412cade6e7740b65cba27ac25 (diff)
downloadgcc-0426e0499783df11c3e046939f3edccd1546a60f.zip
gcc-0426e0499783df11c3e046939f3edccd1546a60f.tar.gz
gcc-0426e0499783df11c3e046939f3edccd1546a60f.tar.bz2
Faster __mulsi routine
From-SVN: r38736
Diffstat (limited to 'gcc/config/v850')
-rw-r--r--gcc/config/v850/lib1funcs.asm93
1 files changed, 48 insertions, 45 deletions
diff --git a/gcc/config/v850/lib1funcs.asm b/gcc/config/v850/lib1funcs.asm
index a162421..9248acb 100644
--- a/gcc/config/v850/lib1funcs.asm
+++ b/gcc/config/v850/lib1funcs.asm
@@ -33,57 +33,60 @@ Boston, MA 02111-1307, USA. */
.type ___mulsi3,@function
/*
- * In order to not deal with negative numbers (mulh is a signed multiply
- * and we want an unsigned multiply, code the multiplication as a series
- * of 7 bit multiplies).
+ * #define SHIFT 12
+ * #define MASK ((1 << SHIFT) - 1)
+ *
+ * #define STEP(i, j) \
+ * ({ \
+ * short a_part = (a >> (i)) & MASK; \
+ * short b_part = (b >> (j)) & MASK; \
+ * int res = (((int)a_part) * ((int)b_part)); \
+ * res; \
+ * })
*
- * int __mulsi3 (unsigned a, unsigned b)
+ * int
+ * __mulsi3 (unsigned a, unsigned b)
* {
- * int i, j;
- * int ret = 0;
- *
- * for (i = 0; i < 32; i += 7)
- * {
- * short a_part = a & 0x7f;
- * unsigned b_tmp = b;
- * a >>= 7;
- *
- * for (j = 0; (i+j) < 32; j += 7)
- * {
- * short b_part = b_tmp & 0x7f;
- * ret += (((int)a_part) * ((int)b_part)) << (i+j);
- * b_tmp >>= 7;
- * }
- * }
- *
- * return ret;
+ * return STEP (0, 0) +
+ * ((STEP (SHIFT, 0) + STEP (0, SHIFT)) << SHIFT) +
+ * ((STEP (0, 2 * SHIFT) + STEP (SHIFT, SHIFT) + STEP (2 * SHIFT, 0))
+ * << (2 * SHIFT));
* }
*/
___mulsi3:
- mov 0,r10 /* total */
- mov 0,r14 /* i = 0, index for multiply a's part */
- movea lo(31),r0,r16 /* upper bounds for loop */
-.L5:
- mov r7,r13 /* b_tmp = b */
- andi 0x7f,r6,r15 /* a_part = (a & 127) */
- shr 7,r6 /* a >>= 7 */
- mov r14,r12 /* i+j = i */
-.L9:
- andi 0x7f,r13,r11 /* b_part = (b_tmp & 127) */
- mulh r15,r11 /* ((int)a_part) * ((int)b_part) */
- shr 7,r13 /* b_tmp >>= 7 */
- shl r12,r11 /* (((int)a_part) * ((int)b_part)) << (i+j) */
- add r11,r10 /* ret += (((int)a_part) * ((int)b_part)) << (i+j) */
- add 7,r12 /* i+j += 7 */
- cmp r16,r12 /* i+j < 32 */
- ble .L9
-
- add 7,r14 /* i += 7 */
- cmp r16,r14 /* i < 32 */
- ble .L5
-
- jmp [r31] /* return */
+ mov r6,r13
+ movea lo(4095),r0,r16
+ and r16,r13
+ mov r7,r15
+ and r16,r15
+ mov r13,r10
+ mulh r15,r10
+ shr 12,r6
+ mov r6,r14
+ and r16,r14
+ mov r14,r11
+ mulh r15,r11
+ shr 12,r7
+ mov r7,r12
+ and r16,r12
+ shr 12,r7
+ and r16,r7
+ mulh r13,r7
+ shr 12,r6
+ mulh r12,r13
+ and r16,r6
+ add r13,r11
+ shl 12,r11
+ add r11,r10
+ mov r14,r11
+ mulh r12,r11
+ mulh r15,r6
+ add r11,r7
+ add r6,r7
+ shl 24,r7
+ add r7,r10
+ jmp [r31]
.size ___mulsi3,.-___mulsi3
#endif