From 0426e0499783df11c3e046939f3edccd1546a60f Mon Sep 17 00:00:00 2001 From: Nick Clifton Date: Sat, 6 Jan 2001 00:46:33 +0000 Subject: Faster __mulsi routine From-SVN: r38736 --- gcc/config/v850/lib1funcs.asm | 93 ++++++++++++++++++++++--------------------- 1 file changed, 48 insertions(+), 45 deletions(-) (limited to 'gcc/config/v850') diff --git a/gcc/config/v850/lib1funcs.asm b/gcc/config/v850/lib1funcs.asm index a162421..9248acb 100644 --- a/gcc/config/v850/lib1funcs.asm +++ b/gcc/config/v850/lib1funcs.asm @@ -33,57 +33,60 @@ Boston, MA 02111-1307, USA. */ .type ___mulsi3,@function /* - * In order to not deal with negative numbers (mulh is a signed multiply - * and we want an unsigned multiply, code the multiplication as a series - * of 7 bit multiplies). + * #define SHIFT 12 + * #define MASK ((1 << SHIFT) - 1) + * + * #define STEP(i, j) \ + * ({ \ + * short a_part = (a >> (i)) & MASK; \ + * short b_part = (b >> (j)) & MASK; \ + * int res = (((int)a_part) * ((int)b_part)); \ + * res; \ + * }) * - * int __mulsi3 (unsigned a, unsigned b) + * int + * __mulsi3 (unsigned a, unsigned b) * { - * int i, j; - * int ret = 0; - * - * for (i = 0; i < 32; i += 7) - * { - * short a_part = a & 0x7f; - * unsigned b_tmp = b; - * a >>= 7; - * - * for (j = 0; (i+j) < 32; j += 7) - * { - * short b_part = b_tmp & 0x7f; - * ret += (((int)a_part) * ((int)b_part)) << (i+j); - * b_tmp >>= 7; - * } - * } - * - * return ret; + * return STEP (0, 0) + + * ((STEP (SHIFT, 0) + STEP (0, SHIFT)) << SHIFT) + + * ((STEP (0, 2 * SHIFT) + STEP (SHIFT, SHIFT) + STEP (2 * SHIFT, 0)) + * << (2 * SHIFT)); * } */ ___mulsi3: - mov 0,r10 /* total */ - mov 0,r14 /* i = 0, index for multiply a's part */ - movea lo(31),r0,r16 /* upper bounds for loop */ -.L5: - mov r7,r13 /* b_tmp = b */ - andi 0x7f,r6,r15 /* a_part = (a & 127) */ - shr 7,r6 /* a >>= 7 */ - mov r14,r12 /* i+j = i */ -.L9: - andi 0x7f,r13,r11 /* b_part = (b_tmp & 127) */ - mulh r15,r11 /* ((int)a_part) * ((int)b_part) */ - shr 7,r13 /* b_tmp >>= 7 */ - shl r12,r11 /* (((int)a_part) * ((int)b_part)) << (i+j) */ - add r11,r10 /* ret += (((int)a_part) * ((int)b_part)) << (i+j) */ - add 7,r12 /* i+j += 7 */ - cmp r16,r12 /* i+j < 32 */ - ble .L9 - - add 7,r14 /* i += 7 */ - cmp r16,r14 /* i < 32 */ - ble .L5 - - jmp [r31] /* return */ + mov r6,r13 + movea lo(4095),r0,r16 + and r16,r13 + mov r7,r15 + and r16,r15 + mov r13,r10 + mulh r15,r10 + shr 12,r6 + mov r6,r14 + and r16,r14 + mov r14,r11 + mulh r15,r11 + shr 12,r7 + mov r7,r12 + and r16,r12 + shr 12,r7 + and r16,r7 + mulh r13,r7 + shr 12,r6 + mulh r12,r13 + and r16,r6 + add r13,r11 + shl 12,r11 + add r11,r10 + mov r14,r11 + mulh r12,r11 + mulh r15,r6 + add r11,r7 + add r6,r7 + shl 24,r7 + add r7,r10 + jmp [r31] .size ___mulsi3,.-___mulsi3 #endif -- cgit v1.1