aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorBernd Schmidt <bernd.schmidt@analog.com>2007-04-12 13:39:35 +0000
committerBernd Schmidt <bernds@gcc.gnu.org>2007-04-12 13:39:35 +0000
commit3fbee523e265acd74b1484b9ec4ff0f809807e06 (patch)
treeb13beefca85f131d4dfe6524d6ccf3324a44b684 /gcc
parent9d3f9aa3a56dcce4ce7f84ec34ef3d7178c917e1 (diff)
downloadgcc-3fbee523e265acd74b1484b9ec4ff0f809807e06.zip
gcc-3fbee523e265acd74b1484b9ec4ff0f809807e06.tar.gz
gcc-3fbee523e265acd74b1484b9ec4ff0f809807e06.tar.bz2
lib1funcs.asm (___umulsi3_highpart, [...]): Use a more efficient implementation.
* config/bfin/lib1funcs.asm (___umulsi3_highpart, __smulsi3_highpart): Use a more efficient implementation. * config/bfin/bfin.md (umulsi3_highpart, smulsi3_highpart): Emit inline sequences when not optimizing for size. From-SVN: r123748
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog5
-rw-r--r--gcc/config/bfin/bfin.md120
-rw-r--r--gcc/config/bfin/lib1funcs.asm42
3 files changed, 107 insertions, 60 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 07f8a5b..ef344f0 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -17,6 +17,11 @@
(flag_macinit1hi): Tighten constraints.
(flag_mul_macv2hi_parts_acconly): New pattern.
+ * config/bfin/lib1funcs.asm (___umulsi3_highpart, __smulsi3_highpart):
+ Use a more efficient implementation.
+ * config/bfin/bfin.md (umulsi3_highpart, smulsi3_highpart): Emit
+ inline sequences when not optimizing for size.
+
2007-02-11 Jie Zhang <jie.zhang@analog.com>
* config/bfin/bfin.opt (msim): New option.
(mcpu=): New option.
diff --git a/gcc/config/bfin/bfin.md b/gcc/config/bfin/bfin.md
index e1eeaa6..ed0da5a 100644
--- a/gcc/config/bfin/bfin.md
+++ b/gcc/config/bfin/bfin.md
@@ -1451,42 +1451,102 @@
[(set_attr "type" "mult")])
(define_expand "umulsi3_highpart"
- [(set (match_operand:SI 0 "register_operand" "")
- (truncate:SI
- (lshiftrt:DI
- (mult:DI (zero_extend:DI
- (match_operand:SI 1 "nonimmediate_operand" ""))
- (zero_extend:DI
- (match_operand:SI 2 "register_operand" "")))
- (const_int 32))))]
- ""
-{
- rtx umulsi3_highpart_libfunc
- = init_one_libfunc ("__umulsi3_highpart");
+ [(parallel
+ [(set (match_operand:SI 0 "register_operand" "")
+ (truncate:SI
+ (lshiftrt:DI
+ (mult:DI (zero_extend:DI
+ (match_operand:SI 1 "nonimmediate_operand" ""))
+ (zero_extend:DI
+ (match_operand:SI 2 "register_operand" "")))
+ (const_int 32))))
+ (clobber (reg:PDI REG_A0))
+ (clobber (reg:PDI REG_A1))])]
+ ""
+{
+ if (!optimize_size)
+ {
+ rtx a1reg = gen_rtx_REG (PDImode, REG_A1);
+ rtx a0reg = gen_rtx_REG (PDImode, REG_A0);
+ emit_insn (gen_flag_macinit1hi (a1reg,
+ gen_lowpart (HImode, operands[1]),
+ gen_lowpart (HImode, operands[2]),
+ GEN_INT (MACFLAG_FU)));
+ emit_insn (gen_lshrpdi3 (a1reg, a1reg, GEN_INT (16)));
+ emit_insn (gen_flag_mul_macv2hi_parts_acconly (a0reg, a1reg,
+ gen_lowpart (V2HImode, operands[1]),
+ gen_lowpart (V2HImode, operands[2]),
+ const1_rtx, const1_rtx,
+ const1_rtx, const0_rtx, a1reg,
+ const0_rtx, GEN_INT (MACFLAG_FU),
+ GEN_INT (MACFLAG_FU)));
+ emit_insn (gen_flag_machi_parts_acconly (a1reg,
+ gen_lowpart (V2HImode, operands[2]),
+ gen_lowpart (V2HImode, operands[1]),
+ const1_rtx, const0_rtx,
+ a1reg, const0_rtx, GEN_INT (MACFLAG_FU)));
+ emit_insn (gen_lshrpdi3 (a1reg, a1reg, GEN_INT (16)));
+ emit_insn (gen_sum_of_accumulators (operands[0], a0reg, a0reg, a1reg));
+ }
+ else
+ {
+ rtx umulsi3_highpart_libfunc
+ = init_one_libfunc ("__umulsi3_highpart");
- emit_library_call_value (umulsi3_highpart_libfunc,
- operands[0], LCT_NORMAL, SImode,
- 2, operands[1], SImode, operands[2], SImode);
+ emit_library_call_value (umulsi3_highpart_libfunc,
+ operands[0], LCT_NORMAL, SImode,
+ 2, operands[1], SImode, operands[2], SImode);
+ }
DONE;
})
(define_expand "smulsi3_highpart"
- [(set (match_operand:SI 0 "register_operand" "")
- (truncate:SI
- (lshiftrt:DI
- (mult:DI (sign_extend:DI
- (match_operand:SI 1 "nonimmediate_operand" ""))
- (sign_extend:DI
- (match_operand:SI 2 "register_operand" "")))
- (const_int 32))))]
- ""
-{
- rtx smulsi3_highpart_libfunc
- = init_one_libfunc ("__smulsi3_highpart");
+ [(parallel
+ [(set (match_operand:SI 0 "register_operand" "")
+ (truncate:SI
+ (lshiftrt:DI
+ (mult:DI (sign_extend:DI
+ (match_operand:SI 1 "nonimmediate_operand" ""))
+ (sign_extend:DI
+ (match_operand:SI 2 "register_operand" "")))
+ (const_int 32))))
+ (clobber (reg:PDI REG_A0))
+ (clobber (reg:PDI REG_A1))])]
+ ""
+{
+ if (!optimize_size)
+ {
+ rtx a1reg = gen_rtx_REG (PDImode, REG_A1);
+ rtx a0reg = gen_rtx_REG (PDImode, REG_A0);
+ emit_insn (gen_flag_macinit1hi (a1reg,
+ gen_lowpart (HImode, operands[1]),
+ gen_lowpart (HImode, operands[2]),
+ GEN_INT (MACFLAG_FU)));
+ emit_insn (gen_lshrpdi3 (a1reg, a1reg, GEN_INT (16)));
+ emit_insn (gen_flag_mul_macv2hi_parts_acconly (a0reg, a1reg,
+ gen_lowpart (V2HImode, operands[1]),
+ gen_lowpart (V2HImode, operands[2]),
+ const1_rtx, const1_rtx,
+ const1_rtx, const0_rtx, a1reg,
+ const0_rtx, GEN_INT (MACFLAG_IS),
+ GEN_INT (MACFLAG_IS_M)));
+ emit_insn (gen_flag_machi_parts_acconly (a1reg,
+ gen_lowpart (V2HImode, operands[2]),
+ gen_lowpart (V2HImode, operands[1]),
+ const1_rtx, const0_rtx,
+ a1reg, const0_rtx, GEN_INT (MACFLAG_IS_M)));
+ emit_insn (gen_ashrpdi3 (a1reg, a1reg, GEN_INT (16)));
+ emit_insn (gen_sum_of_accumulators (operands[0], a0reg, a0reg, a1reg));
+ }
+ else
+ {
+ rtx smulsi3_highpart_libfunc
+ = init_one_libfunc ("__smulsi3_highpart");
- emit_library_call_value (smulsi3_highpart_libfunc,
- operands[0], LCT_NORMAL, SImode,
- 2, operands[1], SImode, operands[2], SImode);
+ emit_library_call_value (smulsi3_highpart_libfunc,
+ operands[0], LCT_NORMAL, SImode,
+ 2, operands[1], SImode, operands[2], SImode);
+ }
DONE;
})
diff --git a/gcc/config/bfin/lib1funcs.asm b/gcc/config/bfin/lib1funcs.asm
index 1d2db9b..fe4c3d5 100644
--- a/gcc/config/bfin/lib1funcs.asm
+++ b/gcc/config/bfin/lib1funcs.asm
@@ -123,17 +123,12 @@ ___umodsi3:
.type ___umulsi3_highpart, STT_FUNC;
___umulsi3_highpart:
- R2 = R1.H * R0.H, R3 = R1.L * R0.H (FU);
- R0 = R1.L * R0.L, R1 = R1.H * R0.L (FU);
- R0 >>= 16;
- /* Unsigned multiplication has the nice property that we can
- ignore carry on this first addition. */
- R0 = R0 + R3;
- R0 = R0 + R1;
- cc = ac0;
- R1 = cc;
- R1 = PACK(R1.l,R0.h);
- R0 = R1 + R2;
+ A1 = R1.L * R0.L (FU);
+ A1 = A1 >> 16;
+ A0 = R1.H * R0.H, A1 += R1.L * R0.H (FU);
+ A1 += R0.L * R1.H (FU);
+ A1 = A1 >> 16;
+ R0 = (A0 += A1);
RTS;
#endif
@@ -143,24 +138,11 @@ ___umulsi3_highpart:
.type ___smulsi3_highpart, STT_FUNC;
___smulsi3_highpart:
- R2 = R1.L * R0.L (FU);
- R3 = R1.H * R0.L (IS,M);
- R0 = R0.H * R1.H, R1 = R0.H * R1.L (IS,M);
-
- R1.L = R2.H + R1.L;
- cc = ac0;
- R2 = cc;
-
- R1.L = R1.L + R3.L;
- cc = ac0;
- R1 >>>= 16;
- R3 >>>= 16;
- R1 = R1 + R3;
- R1 = R1 + R2;
- R2 = cc;
- R1 = R1 + R2;
-
- R0 = R0 + R1;
+ A1 = R1.L * R0.L (FU);
+ A1 = A1 >> 16;
+ A0 = R0.H * R1.H, A1 += R0.H * R1.L (IS,M);
+ A1 += R1.H * R0.L (IS,M);
+ A1 = A1 >>> 16;
+ R0 = (A0 += A1);
RTS;
#endif
-