aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorg-Johann Lay <avr@gjlay.de>2011-07-28 08:03:07 +0000
committerGeorg-Johann Lay <gjl@gcc.gnu.org>2011-07-28 08:03:07 +0000
commit296799ba06ab9ff273038e9b4dd93ca9ba65bf1c (patch)
treef33ec938c92290f1cfa8c36ab509c9b6189d3216
parent2374a88acff100721223f7b944488e64279f0fb2 (diff)
downloadgcc-296799ba06ab9ff273038e9b4dd93ca9ba65bf1c.zip
gcc-296799ba06ab9ff273038e9b4dd93ca9ba65bf1c.tar.gz
gcc-296799ba06ab9ff273038e9b4dd93ca9ba65bf1c.tar.bz2
re PR target/49687 ([avr] Missed optimization for widening MUL)
PR target/49687 * config/avr/t-avr (LIB1ASMFUNCS): Remove _xmulhisi3_exit. Add _muluhisi3, _mulshisi3, _usmulhisi3. * config/avr/libgcc.S (__mulsi3): Rewrite. (__mulhisi3): Rewrite. (__umulhisi3): Rewrite. (__usmulhisi3): New. (__muluhisi3): New. (__mulshisi3): New. (__mulohisi3): New. (__mulqi3, __mulqihi3, __umulqihi3, __mulhi3): Use DEFUN/ENDF to declare. * config/avr/predicates.md (pseudo_register_operand): Rewrite. (pseudo_register_or_const_int_operand): New. (combine_pseudo_register_operand): New. (u16_operand): New. (s16_operand): New. (o16_operand): New. * config/avr/avr.c (avr_rtx_costs): Handle costs for mult:SI. * config/avr/avr.md (QIHI, QIHI2): New mode iterators. (any_extend, any_extend2): New code iterators. (extend_prefix): New code attribute. (mulsi3): Rewrite. Turn insn to expander. (mulhisi3): Ditto. (umulhisi3): Ditto. (usmulhisi3): New expander. (*mulsi3): New insn-and-split. (mulu<mode>si3): New insn-and-split. (muls<mode>si3): New insn-and-split. (mulohisi3): New insn-and-split. (*uumulqihisi3, *uumulhiqisi3, *uumulhihisi3, *uumulqiqisi3, *usmulqihisi3, *usmulhiqisi3, *usmulhihisi3, *usmulqiqisi3, *sumulqihisi3, *sumulhiqisi3, *sumulhihisi3, *sumulqiqisi3, *ssmulqihisi3, *ssmulhiqisi3, *ssmulhihisi3, *ssmulqiqisi3): New insn-and-split. (*mulsi3_call): Rewrite. (*mulhisi3_call): Rewrite. (*umulhisi3_call): Rewrite. (*usmulhisi3_call): New insn. (*muluhisi3_call): New insn. (*mulshisi3_call): New insn. (*mulohisi3_call): New insn. (extendqihi2): Use combine_pseudo_register_operand as predicate for operand 1. (extendqisi2): Ditto. (zero_extendqihi2): Ditto. (zero_extendqisi2): Ditto. (zero_extendhisi2): Ditto. (extendhisi2): Ditto. Don't early-clobber operand 0. From-SVN: r176862
-rw-r--r--gcc/ChangeLog52
-rw-r--r--gcc/config/avr/avr.c28
-rw-r--r--gcc/config/avr/avr.md349
-rw-r--r--gcc/config/avr/libgcc.S403
-rwxr-xr-xgcc/config/avr/predicates.md43
-rw-r--r--gcc/config/avr/t-avr4
6 files changed, 637 insertions, 242 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 2245872..3ac229f 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,55 @@
+2011-07-28 Georg-Johann Lay <avr@gjlay.de>
+
+ PR target/49687
+ * config/avr/t-avr (LIB1ASMFUNCS): Remove _xmulhisi3_exit.
+ Add _muluhisi3, _mulshisi3, _usmulhisi3.
+ * config/avr/libgcc.S (__mulsi3): Rewrite.
+ (__mulhisi3): Rewrite.
+ (__umulhisi3): Rewrite.
+ (__usmulhisi3): New.
+ (__muluhisi3): New.
+ (__mulshisi3): New.
+ (__mulohisi3): New.
+ (__mulqi3, __mulqihi3, __umulqihi3, __mulhi3): Use DEFUN/ENDF to
+ declare.
+ * config/avr/predicates.md (pseudo_register_operand): Rewrite.
+ (pseudo_register_or_const_int_operand): New.
+ (combine_pseudo_register_operand): New.
+ (u16_operand): New.
+ (s16_operand): New.
+ (o16_operand): New.
+ * config/avr/avr.c (avr_rtx_costs): Handle costs for mult:SI.
+ * config/avr/avr.md (QIHI, QIHI2): New mode iterators.
+ (any_extend, any_extend2): New code iterators.
+ (extend_prefix): New code attribute.
+ (mulsi3): Rewrite. Turn insn to expander.
+ (mulhisi3): Ditto.
+ (umulhisi3): Ditto.
+ (usmulhisi3): New expander.
+ (*mulsi3): New insn-and-split.
+ (mulu<mode>si3): New insn-and-split.
+ (muls<mode>si3): New insn-and-split.
+ (mulohisi3): New insn-and-split.
+ (*uumulqihisi3, *uumulhiqisi3, *uumulhihisi3, *uumulqiqisi3,
+ *usmulqihisi3, *usmulhiqisi3, *usmulhihisi3, *usmulqiqisi3,
+ *sumulqihisi3, *sumulhiqisi3, *sumulhihisi3, *sumulqiqisi3,
+ *ssmulqihisi3, *ssmulhiqisi3, *ssmulhihisi3, *ssmulqiqisi3): New
+ insn-and-split.
+ (*mulsi3_call): Rewrite.
+ (*mulhisi3_call): Rewrite.
+ (*umulhisi3_call): Rewrite.
+ (*usmulhisi3_call): New insn.
+ (*muluhisi3_call): New insn.
+ (*mulshisi3_call): New insn.
+ (*mulohisi3_call): New insn.
+ (extendqihi2): Use combine_pseudo_register_operand as predicate
+ for operand 1.
+ (extendqisi2): Ditto.
+ (zero_extendqihi2): Ditto.
+ (zero_extendqisi2): Ditto.
+ (zero_extendhisi2): Ditto.
+ (extendhisi2): Ditto. Don't early-clobber operand 0.
+
2011-07-28 Uros Bizjak <ubizjak@gmail.com>
* config/i386/i386.c (add->lea splitter): Add SWI mode to PLUS RTX.
diff --git a/gcc/config/avr/avr.c b/gcc/config/avr/avr.c
index eb1707e..d9ed224 100644
--- a/gcc/config/avr/avr.c
+++ b/gcc/config/avr/avr.c
@@ -5515,6 +5515,34 @@ avr_rtx_costs (rtx x, int codearg, int outer_code ATTRIBUTE_UNUSED, int *total,
return false;
break;
+ case SImode:
+ if (AVR_HAVE_MUL)
+ {
+ if (!speed)
+ {
+ /* Add some additional costs besides CALL like moves etc. */
+
+ *total = COSTS_N_INSNS (AVR_HAVE_JMP_CALL ? 5 : 4);
+ }
+ else
+ {
+ /* Just a rough estimate. Even with -O2 we don't want bulky
+ code expanded inline. */
+
+ *total = COSTS_N_INSNS (25);
+ }
+ }
+ else
+ {
+ if (speed)
+ *total = COSTS_N_INSNS (300);
+ else
+ /* Add some additional costs besides CALL like moves etc. */
+ *total = COSTS_N_INSNS (AVR_HAVE_JMP_CALL ? 5 : 4);
+ }
+
+ return true;
+
default:
return false;
}
diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md
index 66c3db2..3f3bb6a 100644
--- a/gcc/config/avr/avr.md
+++ b/gcc/config/avr/avr.md
@@ -127,12 +127,25 @@
(const_int 2))]
(const_int 2)))
-;; Define mode iterator
+;; Define mode iterators
+(define_mode_iterator QIHI [(QI "") (HI "")])
+(define_mode_iterator QIHI2 [(QI "") (HI "")])
(define_mode_iterator QISI [(QI "") (HI "") (SI "")])
(define_mode_iterator QIDI [(QI "") (HI "") (SI "") (DI "")])
(define_mode_iterator HIDI [(HI "") (SI "") (DI "")])
(define_mode_iterator HISI [(HI "") (SI "")])
+;; Define code iterators
+;; Define two incarnations so that we can build the cross product.
+(define_code_iterator any_extend [sign_extend zero_extend])
+(define_code_iterator any_extend2 [sign_extend zero_extend])
+
+;; Define code attributes
+(define_code_attr extend_prefix
+ [(sign_extend "s")
+ (zero_extend "u")])
+
+
;;========================================================================
;; The following is used by nonlocal_goto and setjmp.
;; The receiver pattern will create no instructions since internally
@@ -1350,69 +1363,310 @@
;; Operand 2 (reg:SI 18) not clobbered on the enhanced core.
;; All call-used registers clobbered otherwise - normal library call.
+;; To support widening multiplicatioon with constant we postpone
+;; expanding to the implicit library call until post combine and
+;; prior to register allocation. Clobber all hard registers that
+;; might be used by the (widening) multiply until it is split and
+;; it's final register footprint is worked out.
+
(define_expand "mulsi3"
- [(set (reg:SI 22) (match_operand:SI 1 "register_operand" ""))
- (set (reg:SI 18) (match_operand:SI 2 "register_operand" ""))
- (parallel [(set (reg:SI 22) (mult:SI (reg:SI 22) (reg:SI 18)))
- (clobber (reg:HI 26))
- (clobber (reg:HI 30))])
- (set (match_operand:SI 0 "register_operand" "") (reg:SI 22))]
+ [(parallel [(set (match_operand:SI 0 "register_operand" "")
+ (mult:SI (match_operand:SI 1 "register_operand" "")
+ (match_operand:SI 2 "nonmemory_operand" "")))
+ (clobber (reg:DI 18))])]
"AVR_HAVE_MUL"
- "")
+ {
+ if (u16_operand (operands[2], SImode))
+ {
+ operands[2] = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode));
+ emit_insn (gen_muluhisi3 (operands[0], operands[2], operands[1]));
+ DONE;
+ }
-(define_insn "*mulsi3_call"
- [(set (reg:SI 22) (mult:SI (reg:SI 22) (reg:SI 18)))
- (clobber (reg:HI 26))
- (clobber (reg:HI 30))]
- "AVR_HAVE_MUL"
- "%~call __mulsi3"
- [(set_attr "type" "xcall")
- (set_attr "cc" "clobber")])
+ if (o16_operand (operands[2], SImode))
+ {
+ operands[2] = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode));
+ emit_insn (gen_mulohisi3 (operands[0], operands[2], operands[1]));
+ DONE;
+ }
+ })
-(define_expand "mulhisi3"
- [(set (reg:HI 18)
- (match_operand:HI 1 "register_operand" ""))
- (set (reg:HI 20)
- (match_operand:HI 2 "register_operand" ""))
+(define_insn_and_split "*mulsi3"
+ [(set (match_operand:SI 0 "pseudo_register_operand" "=r")
+ (mult:SI (match_operand:SI 1 "pseudo_register_operand" "r")
+ (match_operand:SI 2 "pseudo_register_or_const_int_operand" "rn")))
+ (clobber (reg:DI 18))]
+ "AVR_HAVE_MUL && !reload_completed"
+ { gcc_unreachable(); }
+ "&& 1"
+ [(set (reg:SI 18)
+ (match_dup 1))
(set (reg:SI 22)
- (mult:SI (sign_extend:SI (reg:HI 18))
- (sign_extend:SI (reg:HI 20))))
- (set (match_operand:SI 0 "register_operand" "")
+ (match_dup 2))
+ (parallel [(set (reg:SI 22)
+ (mult:SI (reg:SI 22)
+ (reg:SI 18)))
+ (clobber (reg:HI 26))])
+ (set (match_dup 0)
+ (reg:SI 22))]
+ {
+ if (u16_operand (operands[2], SImode))
+ {
+ operands[2] = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode));
+ emit_insn (gen_muluhisi3 (operands[0], operands[2], operands[1]));
+ DONE;
+ }
+
+ if (o16_operand (operands[2], SImode))
+ {
+ operands[2] = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode));
+ emit_insn (gen_mulohisi3 (operands[0], operands[2], operands[1]));
+ DONE;
+ }
+ })
+
+;; "muluqisi3"
+;; "muluhisi3"
+(define_insn_and_split "mulu<mode>si3"
+ [(set (match_operand:SI 0 "pseudo_register_operand" "=r")
+ (mult:SI (zero_extend:SI (match_operand:QIHI 1 "pseudo_register_operand" "r"))
+ (match_operand:SI 2 "pseudo_register_or_const_int_operand" "rn")))
+ (clobber (reg:DI 18))]
+ "AVR_HAVE_MUL && !reload_completed"
+ { gcc_unreachable(); }
+ "&& 1"
+ [(set (reg:HI 26)
+ (match_dup 1))
+ (set (reg:SI 18)
+ (match_dup 2))
+ (set (reg:SI 22)
+ (mult:SI (zero_extend:SI (reg:HI 26))
+ (reg:SI 18)))
+ (set (match_dup 0)
+ (reg:SI 22))]
+ {
+ /* Do the QI -> HI extension explicitely before the multiplication. */
+ /* Do the HI -> SI extension implicitely and after the multiplication. */
+
+ if (QImode == <MODE>mode)
+ operands[1] = gen_rtx_ZERO_EXTEND (HImode, operands[1]);
+
+ if (u16_operand (operands[2], SImode))
+ {
+ operands[1] = force_reg (HImode, operands[1]);
+ operands[2] = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode));
+ emit_insn (gen_umulhisi3 (operands[0], operands[1], operands[2]));
+ DONE;
+ }
+ })
+
+;; "mulsqisi3"
+;; "mulshisi3"
+(define_insn_and_split "muls<mode>si3"
+ [(set (match_operand:SI 0 "pseudo_register_operand" "=r")
+ (mult:SI (sign_extend:SI (match_operand:QIHI 1 "pseudo_register_operand" "r"))
+ (match_operand:SI 2 "pseudo_register_or_const_int_operand" "rn")))
+ (clobber (reg:DI 18))]
+ "AVR_HAVE_MUL && !reload_completed"
+ { gcc_unreachable(); }
+ "&& 1"
+ [(set (reg:HI 26)
+ (match_dup 1))
+ (set (reg:SI 18)
+ (match_dup 2))
+ (set (reg:SI 22)
+ (mult:SI (sign_extend:SI (reg:HI 26))
+ (reg:SI 18)))
+ (set (match_dup 0)
(reg:SI 22))]
+ {
+ /* Do the QI -> HI extension explicitely before the multiplication. */
+ /* Do the HI -> SI extension implicitely and after the multiplication. */
+
+ if (QImode == <MODE>mode)
+ operands[1] = gen_rtx_SIGN_EXTEND (HImode, operands[1]);
+
+ if (u16_operand (operands[2], SImode)
+ || s16_operand (operands[2], SImode))
+ {
+ rtx xop2 = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode));
+
+ operands[1] = force_reg (HImode, operands[1]);
+
+ if (u16_operand (operands[2], SImode))
+ emit_insn (gen_usmulhisi3 (operands[0], xop2, operands[1]));
+ else
+ emit_insn (gen_mulhisi3 (operands[0], operands[1], xop2));
+
+ DONE;
+ }
+ })
+
+;; One-extend operand 1
+
+(define_insn_and_split "mulohisi3"
+ [(set (match_operand:SI 0 "pseudo_register_operand" "=r")
+ (mult:SI (not:SI (zero_extend:SI
+ (not:HI (match_operand:HI 1 "pseudo_register_operand" "r"))))
+ (match_operand:SI 2 "pseudo_register_or_const_int_operand" "rn")))
+ (clobber (reg:DI 18))]
+ "AVR_HAVE_MUL && !reload_completed"
+ { gcc_unreachable(); }
+ "&& 1"
+ [(set (reg:HI 26)
+ (match_dup 1))
+ (set (reg:SI 18)
+ (match_dup 2))
+ (set (reg:SI 22)
+ (mult:SI (not:SI (zero_extend:SI (not:HI (reg:HI 26))))
+ (reg:SI 18)))
+ (set (match_dup 0)
+ (reg:SI 22))]
+ "")
+
+(define_expand "mulhisi3"
+ [(parallel [(set (match_operand:SI 0 "register_operand" "")
+ (mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand" ""))
+ (sign_extend:SI (match_operand:HI 2 "register_operand" ""))))
+ (clobber (reg:DI 18))])]
"AVR_HAVE_MUL"
"")
(define_expand "umulhisi3"
+ [(parallel [(set (match_operand:SI 0 "register_operand" "")
+ (mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" ""))
+ (zero_extend:SI (match_operand:HI 2 "register_operand" ""))))
+ (clobber (reg:DI 18))])]
+ "AVR_HAVE_MUL"
+ "")
+
+(define_expand "usmulhisi3"
+ [(parallel [(set (match_operand:SI 0 "register_operand" "")
+ (mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" ""))
+ (sign_extend:SI (match_operand:HI 2 "register_operand" ""))))
+ (clobber (reg:DI 18))])]
+ "AVR_HAVE_MUL"
+ "")
+
+;; "*uumulqihisi3" "*uumulhiqisi3" "*uumulhihisi3" "*uumulqiqisi3"
+;; "*usmulqihisi3" "*usmulhiqisi3" "*usmulhihisi3" "*usmulqiqisi3"
+;; "*sumulqihisi3" "*sumulhiqisi3" "*sumulhihisi3" "*sumulqiqisi3"
+;; "*ssmulqihisi3" "*ssmulhiqisi3" "*ssmulhihisi3" "*ssmulqiqisi3"
+(define_insn_and_split
+ "*<any_extend:extend_prefix><any_extend2:extend_prefix>mul<QIHI:mode><QIHI2:mode>si3"
+ [(set (match_operand:SI 0 "pseudo_register_operand" "=r")
+ (mult:SI (any_extend:SI (match_operand:QIHI 1 "pseudo_register_operand" "r"))
+ (any_extend2:SI (match_operand:QIHI2 2 "pseudo_register_operand" "r"))))
+ (clobber (reg:DI 18))]
+ "AVR_HAVE_MUL && !reload_completed"
+ { gcc_unreachable(); }
+ "&& 1"
[(set (reg:HI 18)
- (match_operand:HI 1 "register_operand" ""))
- (set (reg:HI 20)
- (match_operand:HI 2 "register_operand" ""))
- (set (reg:SI 22)
- (mult:SI (zero_extend:SI (reg:HI 18))
- (zero_extend:SI (reg:HI 20))))
- (set (match_operand:SI 0 "register_operand" "")
+ (match_dup 1))
+ (set (reg:HI 26)
+ (match_dup 2))
+ (set (reg:SI 22)
+ (mult:SI (match_dup 3)
+ (match_dup 4)))
+ (set (match_dup 0)
(reg:SI 22))]
+ {
+ rtx xop1 = operands[1];
+ rtx xop2 = operands[2];
+
+ /* Do the QI -> HI extension explicitely before the multiplication. */
+ /* Do the HI -> SI extension implicitely and after the multiplication. */
+
+ if (QImode == <QIHI:MODE>mode)
+ xop1 = gen_rtx_fmt_e (<any_extend:CODE>, HImode, xop1);
+
+ if (QImode == <QIHI2:MODE>mode)
+ xop2 = gen_rtx_fmt_e (<any_extend2:CODE>, HImode, xop2);
+
+ if (<any_extend:CODE> == <any_extend2:CODE>
+ || <any_extend:CODE> == ZERO_EXTEND)
+ {
+ operands[1] = xop1;
+ operands[2] = xop2;
+ operands[3] = gen_rtx_fmt_e (<any_extend:CODE>, SImode, gen_rtx_REG (HImode, 18));
+ operands[4] = gen_rtx_fmt_e (<any_extend2:CODE>, SImode, gen_rtx_REG (HImode, 26));
+ }
+ else
+ {
+ /* <any_extend:CODE> = SIGN_EXTEND */
+ /* <any_extend2:CODE> = ZERO_EXTEND */
+
+ operands[1] = xop2;
+ operands[2] = xop1;
+ operands[3] = gen_rtx_ZERO_EXTEND (SImode, gen_rtx_REG (HImode, 18));
+ operands[4] = gen_rtx_SIGN_EXTEND (SImode, gen_rtx_REG (HImode, 26));
+ }
+ })
+
+(define_insn "*mulsi3_call"
+ [(set (reg:SI 22)
+ (mult:SI (reg:SI 22)
+ (reg:SI 18)))
+ (clobber (reg:HI 26))]
"AVR_HAVE_MUL"
- "")
+ "%~call __mulsi3"
+ [(set_attr "type" "xcall")
+ (set_attr "cc" "clobber")])
(define_insn "*mulhisi3_call"
- [(set (reg:SI 22)
+ [(set (reg:SI 22)
(mult:SI (sign_extend:SI (reg:HI 18))
- (sign_extend:SI (reg:HI 20))))]
+ (sign_extend:SI (reg:HI 26))))]
"AVR_HAVE_MUL"
"%~call __mulhisi3"
[(set_attr "type" "xcall")
(set_attr "cc" "clobber")])
(define_insn "*umulhisi3_call"
- [(set (reg:SI 22)
+ [(set (reg:SI 22)
(mult:SI (zero_extend:SI (reg:HI 18))
- (zero_extend:SI (reg:HI 20))))]
+ (zero_extend:SI (reg:HI 26))))]
"AVR_HAVE_MUL"
"%~call __umulhisi3"
[(set_attr "type" "xcall")
(set_attr "cc" "clobber")])
+(define_insn "*usmulhisi3_call"
+ [(set (reg:SI 22)
+ (mult:SI (zero_extend:SI (reg:HI 18))
+ (sign_extend:SI (reg:HI 26))))]
+ "AVR_HAVE_MUL"
+ "%~call __usmulhisi3"
+ [(set_attr "type" "xcall")
+ (set_attr "cc" "clobber")])
+
+(define_insn "*muluhisi3_call"
+ [(set (reg:SI 22)
+ (mult:SI (zero_extend:SI (reg:HI 26))
+ (reg:SI 18)))]
+ "AVR_HAVE_MUL"
+ "%~call __muluhisi3"
+ [(set_attr "type" "xcall")
+ (set_attr "cc" "clobber")])
+
+(define_insn "*mulshisi3_call"
+ [(set (reg:SI 22)
+ (mult:SI (sign_extend:SI (reg:HI 26))
+ (reg:SI 18)))]
+ "AVR_HAVE_MUL"
+ "%~call __mulshisi3"
+ [(set_attr "type" "xcall")
+ (set_attr "cc" "clobber")])
+
+(define_insn "*mulohisi3_call"
+ [(set (reg:SI 22)
+ (mult:SI (not:SI (zero_extend:SI (not:HI (reg:HI 26))))
+ (reg:SI 18)))]
+ "AVR_HAVE_MUL"
+ "%~call __mulohisi3"
+ [(set_attr "type" "xcall")
+ (set_attr "cc" "clobber")])
+
; / % / % / % / % / % / % / % / % / % / % / % / % / % / % / % / % / % / % / %
; divmod
@@ -2400,9 +2654,16 @@
;; xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x
;; sign extend
+;; We keep combiner from inserting hard registers into the input of sign- and
+;; zero-extends. A hard register in the input operand is not wanted because
+;; 32-bit multiply patterns clobber some hard registers and extends with a
+;; hard register that overlaps these clobbers won't be combined to a widening
+;; multiplication. There is no need for combine to propagate hard registers,
+;; register allocation can do it just as well.
+
(define_insn "extendqihi2"
[(set (match_operand:HI 0 "register_operand" "=r,r")
- (sign_extend:HI (match_operand:QI 1 "register_operand" "0,*r")))]
+ (sign_extend:HI (match_operand:QI 1 "combine_pseudo_register_operand" "0,*r")))]
""
"@
clr %B0\;sbrc %0,7\;com %B0
@@ -2412,7 +2673,7 @@
(define_insn "extendqisi2"
[(set (match_operand:SI 0 "register_operand" "=r,r")
- (sign_extend:SI (match_operand:QI 1 "register_operand" "0,*r")))]
+ (sign_extend:SI (match_operand:QI 1 "combine_pseudo_register_operand" "0,*r")))]
""
"@
clr %B0\;sbrc %A0,7\;com %B0\;mov %C0,%B0\;mov %D0,%B0
@@ -2421,8 +2682,8 @@
(set_attr "cc" "set_n,set_n")])
(define_insn "extendhisi2"
- [(set (match_operand:SI 0 "register_operand" "=r,&r")
- (sign_extend:SI (match_operand:HI 1 "register_operand" "0,*r")))]
+ [(set (match_operand:SI 0 "register_operand" "=r,r")
+ (sign_extend:SI (match_operand:HI 1 "combine_pseudo_register_operand" "0,*r")))]
""
"@
clr %C0\;sbrc %B0,7\;com %C0\;mov %D0,%C0
@@ -2439,7 +2700,7 @@
(define_insn_and_split "zero_extendqihi2"
[(set (match_operand:HI 0 "register_operand" "=r")
- (zero_extend:HI (match_operand:QI 1 "register_operand" "r")))]
+ (zero_extend:HI (match_operand:QI 1 "combine_pseudo_register_operand" "r")))]
""
"#"
"reload_completed"
@@ -2455,7 +2716,7 @@
(define_insn_and_split "zero_extendqisi2"
[(set (match_operand:SI 0 "register_operand" "=r")
- (zero_extend:SI (match_operand:QI 1 "register_operand" "r")))]
+ (zero_extend:SI (match_operand:QI 1 "combine_pseudo_register_operand" "r")))]
""
"#"
"reload_completed"
@@ -2470,8 +2731,8 @@
})
(define_insn_and_split "zero_extendhisi2"
- [(set (match_operand:SI 0 "register_operand" "=r")
- (zero_extend:SI (match_operand:HI 1 "register_operand" "r")))]
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (zero_extend:SI (match_operand:HI 1 "combine_pseudo_register_operand" "r")))]
""
"#"
"reload_completed"
diff --git a/gcc/config/avr/libgcc.S b/gcc/config/avr/libgcc.S
index 7f3feeb..f6084a7 100644
--- a/gcc/config/avr/libgcc.S
+++ b/gcc/config/avr/libgcc.S
@@ -72,10 +72,11 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
.endm
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
/* Note: mulqi3, mulhi3 are open-coded on the enhanced core. */
#if !defined (__AVR_HAVE_MUL__)
/*******************************************************
- Multiplication 8 x 8
+ Multiplication 8 x 8 without MUL
*******************************************************/
#if defined (L_mulqi3)
@@ -83,9 +84,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define r_arg1 r24 /* multiplier */
#define r_res __tmp_reg__ /* result */
- .global __mulqi3
- .func __mulqi3
-__mulqi3:
+DEFUN __mulqi3
clr r_res ; clear result
__mulqi3_loop:
sbrc r_arg1,0
@@ -97,18 +96,16 @@ __mulqi3_loop:
__mulqi3_exit:
mov r_arg1,r_res ; result to return register
ret
+ENDF __mulqi3
#undef r_arg2
#undef r_arg1
#undef r_res
-.endfunc
#endif /* defined (L_mulqi3) */
#if defined (L_mulqihi3)
- .global __mulqihi3
- .func __mulqihi3
-__mulqihi3:
+DEFUN __mulqihi3
clr r25
sbrc r24, 7
dec r25
@@ -116,21 +113,19 @@ __mulqihi3:
sbrc r22, 7
dec r22
rjmp __mulhi3
- .endfunc
+ENDF __mulqihi3:
#endif /* defined (L_mulqihi3) */
#if defined (L_umulqihi3)
- .global __umulqihi3
- .func __umulqihi3
-__umulqihi3:
+DEFUN __umulqihi3
clr r25
clr r23
rjmp __mulhi3
- .endfunc
+ENDF __umulqihi3
#endif /* defined (L_umulqihi3) */
/*******************************************************
- Multiplication 16 x 16
+ Multiplication 16 x 16 without MUL
*******************************************************/
#if defined (L_mulhi3)
#define r_arg1L r24 /* multiplier Low */
@@ -140,9 +135,7 @@ __umulqihi3:
#define r_resL __tmp_reg__ /* result Low */
#define r_resH r21 /* result High */
- .global __mulhi3
- .func __mulhi3
-__mulhi3:
+DEFUN __mulhi3
clr r_resH ; clear result
clr r_resL ; clear result
__mulhi3_loop:
@@ -166,6 +159,7 @@ __mulhi3_exit:
mov r_arg1H,r_resH ; result to return register
mov r_arg1L,r_resL
ret
+ENDF __mulhi3
#undef r_arg1L
#undef r_arg1H
@@ -174,168 +168,51 @@ __mulhi3_exit:
#undef r_resL
#undef r_resH
-.endfunc
#endif /* defined (L_mulhi3) */
-#endif /* !defined (__AVR_HAVE_MUL__) */
/*******************************************************
- Widening Multiplication 32 = 16 x 16
+ Widening Multiplication 32 = 16 x 16 without MUL
*******************************************************/
-
+
#if defined (L_mulhisi3)
DEFUN __mulhisi3
-#if defined (__AVR_HAVE_MUL__)
-
-;; r25:r22 = r19:r18 * r21:r20
-
-#define A0 18
-#define B0 20
-#define C0 22
-
-#define A1 A0+1
-#define B1 B0+1
-#define C1 C0+1
-#define C2 C0+2
-#define C3 C0+3
-
- ; C = (signed)A1 * (signed)B1
- muls A1, B1
- movw C2, R0
-
- ; C += A0 * B0
- mul A0, B0
- movw C0, R0
-
- ; C += (signed)A1 * B0
- mulsu A1, B0
- sbci C3, 0
- add C1, R0
- adc C2, R1
- clr __zero_reg__
- adc C3, __zero_reg__
-
- ; C += (signed)B1 * A0
- mulsu B1, A0
- sbci C3, 0
- XJMP __xmulhisi3_exit
-
-#undef A0
-#undef A1
-#undef B0
-#undef B1
-#undef C0
-#undef C1
-#undef C2
-#undef C3
-
-#else /* !__AVR_HAVE_MUL__ */
;;; FIXME: This is dead code (noone calls it)
- mov_l r18, r24
- mov_h r19, r25
- clr r24
- sbrc r23, 7
- dec r24
- mov r25, r24
- clr r20
- sbrc r19, 7
- dec r20
- mov r21, r20
- XJMP __mulsi3
-#endif /* __AVR_HAVE_MUL__ */
+ mov_l r18, r24
+ mov_h r19, r25
+ clr r24
+ sbrc r23, 7
+ dec r24
+ mov r25, r24
+ clr r20
+ sbrc r19, 7
+ dec r20
+ mov r21, r20
+ XJMP __mulsi3
ENDF __mulhisi3
#endif /* defined (L_mulhisi3) */
#if defined (L_umulhisi3)
DEFUN __umulhisi3
-#if defined (__AVR_HAVE_MUL__)
-
-;; r25:r22 = r19:r18 * r21:r20
-
-#define A0 18
-#define B0 20
-#define C0 22
-
-#define A1 A0+1
-#define B1 B0+1
-#define C1 C0+1
-#define C2 C0+2
-#define C3 C0+3
-
- ; C = A1 * B1
- mul A1, B1
- movw C2, R0
-
- ; C += A0 * B0
- mul A0, B0
- movw C0, R0
-
- ; C += A1 * B0
- mul A1, B0
- add C1, R0
- adc C2, R1
- clr __zero_reg__
- adc C3, __zero_reg__
-
- ; C += B1 * A0
- mul B1, A0
- XJMP __xmulhisi3_exit
-
-#undef A0
-#undef A1
-#undef B0
-#undef B1
-#undef C0
-#undef C1
-#undef C2
-#undef C3
-
-#else /* !__AVR_HAVE_MUL__ */
;;; FIXME: This is dead code (noone calls it)
- mov_l r18, r24
- mov_h r19, r25
- clr r24
- clr r25
- clr r20
- clr r21
- XJMP __mulsi3
-#endif /* __AVR_HAVE_MUL__ */
+ mov_l r18, r24
+ mov_h r19, r25
+ clr r24
+ clr r25
+ mov_l r20, r24
+ mov_h r21, r25
+ XJMP __mulsi3
ENDF __umulhisi3
#endif /* defined (L_umulhisi3) */
-#if defined (L_xmulhisi3_exit)
-
-;;; Helper for __mulhisi3 resp. __umulhisi3.
-
-#define C0 22
-#define C1 C0+1
-#define C2 C0+2
-#define C3 C0+3
-
-DEFUN __xmulhisi3_exit
- add C1, R0
- adc C2, R1
- clr __zero_reg__
- adc C3, __zero_reg__
- ret
-ENDF __xmulhisi3_exit
-
-#undef C0
-#undef C1
-#undef C2
-#undef C3
-
-#endif /* defined (L_xmulhisi3_exit) */
-
#if defined (L_mulsi3)
/*******************************************************
- Multiplication 32 x 32
+ Multiplication 32 x 32 without MUL
*******************************************************/
#define r_arg1L r22 /* multiplier Low */
#define r_arg1H r23
#define r_arg1HL r24
#define r_arg1HH r25 /* multiplier High */
-
#define r_arg2L r18 /* multiplicand Low */
#define r_arg2H r19
#define r_arg2HL r20
@@ -346,43 +223,7 @@ ENDF __xmulhisi3_exit
#define r_resHL r30
#define r_resHH r31 /* result High */
-
- .global __mulsi3
- .func __mulsi3
-__mulsi3:
-#if defined (__AVR_HAVE_MUL__)
- mul r_arg1L, r_arg2L
- movw r_resL, r0
- mul r_arg1H, r_arg2H
- movw r_resHL, r0
- mul r_arg1HL, r_arg2L
- add r_resHL, r0
- adc r_resHH, r1
- mul r_arg1L, r_arg2HL
- add r_resHL, r0
- adc r_resHH, r1
- mul r_arg1HH, r_arg2L
- add r_resHH, r0
- mul r_arg1HL, r_arg2H
- add r_resHH, r0
- mul r_arg1H, r_arg2HL
- add r_resHH, r0
- mul r_arg1L, r_arg2HH
- add r_resHH, r0
- clr r_arg1HH ; use instead of __zero_reg__ to add carry
- mul r_arg1H, r_arg2L
- add r_resH, r0
- adc r_resHL, r1
- adc r_resHH, r_arg1HH ; add carry
- mul r_arg1L, r_arg2H
- add r_resH, r0
- adc r_resHL, r1
- adc r_resHH, r_arg1HH ; add carry
- movw r_arg1L, r_resL
- movw r_arg1HL, r_resHL
- clr r1 ; __zero_reg__ clobbered by "mul"
- ret
-#else
+DEFUN __mulsi3
clr r_resHH ; clear result
clr r_resHL ; clear result
clr r_resH ; clear result
@@ -414,13 +255,13 @@ __mulsi3_exit:
mov_h r_arg1H,r_resH
mov_l r_arg1L,r_resL
ret
-#endif /* defined (__AVR_HAVE_MUL__) */
+ENDF __mulsi3
+
#undef r_arg1L
#undef r_arg1H
#undef r_arg1HL
#undef r_arg1HH
-
#undef r_arg2L
#undef r_arg2H
#undef r_arg2HL
@@ -431,9 +272,181 @@ __mulsi3_exit:
#undef r_resHL
#undef r_resHH
-.endfunc
#endif /* defined (L_mulsi3) */
+
+#endif /* !defined (__AVR_HAVE_MUL__) */
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+#if defined (__AVR_HAVE_MUL__)
+#define A0 26
+#define B0 18
+#define C0 22
+
+#define A1 A0+1
+
+#define B1 B0+1
+#define B2 B0+2
+#define B3 B0+3
+
+#define C1 C0+1
+#define C2 C0+2
+#define C3 C0+3
+
+/*******************************************************
+ Widening Multiplication 32 = 16 x 16
+*******************************************************/
+
+#if defined (L_mulhisi3)
+;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18
+;;; C3:C0 = (signed long) A1:A0 * (signed long) B1:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __mulhisi3
+ XCALL __umulhisi3
+ ;; Sign-extend B
+ tst B1
+ brpl 1f
+ sub C2, A0
+ sbc C3, A1
+1: ;; Sign-extend A
+ XJMP __usmulhisi3_tail
+ENDF __mulhisi3
+#endif /* L_mulhisi3 */
+
+#if defined (L_usmulhisi3)
+;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18
+;;; C3:C0 = (signed long) A1:A0 * (unsigned long) B1:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __usmulhisi3
+ XCALL __umulhisi3
+ ;; FALLTHRU
+ENDF __usmulhisi3
+
+DEFUN __usmulhisi3_tail
+ ;; Sign-extend A
+ sbrs A1, 7
+ ret
+ sub C2, B0
+ sbc C3, B1
+ ret
+ENDF __usmulhisi3_tail
+#endif /* L_usmulhisi3 */
+
+#if defined (L_umulhisi3)
+;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18
+;;; C3:C0 = (unsigned long) A1:A0 * (unsigned long) B1:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __umulhisi3
+ mul A0, B0
+ movw C0, r0
+ mul A1, B1
+ movw C2, r0
+ mul A0, B1
+ rcall 1f
+ mul A1, B0
+1: add C1, r0
+ adc C2, r1
+ clr __zero_reg__
+ adc C3, __zero_reg__
+ ret
+ENDF __umulhisi3
+#endif /* L_umulhisi3 */
+
+/*******************************************************
+ Widening Multiplication 32 = 16 x 32
+*******************************************************/
+
+#if defined (L_mulshisi3)
+;;; R25:R22 = (signed long) R27:R26 * R21:R18
+;;; (C3:C0) = (signed long) A1:A0 * B3:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __mulshisi3
+#ifdef __AVR_HAVE_JMP_CALL__
+ ;; Some cores have problem skipping 2-word instruction
+ tst A1
+ brmi __mulohisi3
+#else
+ sbrs A1, 7
+#endif /* __AVR_HAVE_JMP_CALL__ */
+ XJMP __muluhisi3
+ ;; FALLTHRU
+ENDF __mulshisi3
+
+;;; R25:R22 = (one-extended long) R27:R26 * R21:R18
+;;; (C3:C0) = (one-extended long) A1:A0 * B3:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __mulohisi3
+ XCALL __muluhisi3
+ ;; One-extend R27:R26 (A1:A0)
+ sub C2, B0
+ sbc C3, B1
+ ret
+ENDF __mulohisi3
+#endif /* L_mulshisi3 */
+
+#if defined (L_muluhisi3)
+;;; R25:R22 = (unsigned long) R27:R26 * R21:R18
+;;; (C3:C0) = (unsigned long) A1:A0 * B3:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __muluhisi3
+ XCALL __umulhisi3
+ mul A0, B3
+ add C3, r0
+ mul A1, B2
+ add C3, r0
+ mul A0, B2
+ add C2, r0
+ adc C3, r1
+ clr __zero_reg__
+ ret
+ENDF __muluhisi3
+#endif /* L_muluhisi3 */
+
+/*******************************************************
+ Multiplication 32 x 32
+*******************************************************/
+
+#if defined (L_mulsi3)
+;;; R25:R22 = R25:R22 * R21:R18
+;;; (C3:C0) = C3:C0 * B3:B0
+;;; Clobbers: R26, R27, __tmp_reg__
+DEFUN __mulsi3
+ movw A0, C0
+ push C2
+ push C3
+ XCALL __muluhisi3
+ pop A1
+ pop A0
+ ;; A1:A0 now contains the high word of A
+ mul A0, B0
+ add C2, r0
+ adc C3, r1
+ mul A0, B1
+ add C3, r0
+ mul A1, B0
+ add C3, r0
+ clr __zero_reg__
+ ret
+ENDF __mulsi3
+#endif /* L_mulsi3 */
+
+#undef A0
+#undef A1
+
+#undef B0
+#undef B1
+#undef B2
+#undef B3
+
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+
+#endif /* __AVR_HAVE_MUL__ */
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
/*******************************************************
Division 8 / 8 => (result + remainder)
*******************************************************/
diff --git a/gcc/config/avr/predicates.md b/gcc/config/avr/predicates.md
index 6646cb5..98262d5 100755
--- a/gcc/config/avr/predicates.md
+++ b/gcc/config/avr/predicates.md
@@ -155,10 +155,34 @@
(ior (match_test "register_operand (XEXP (op, 0), mode)")
(match_test "CONSTANT_ADDRESS_P (XEXP (op, 0))"))))
+;; For some insns we must ensure that no hard register is inserted
+;; into their operands because the insns are split and the split
+;; involves hard registers. An example are divmod insn that are
+;; split to insns that represent implicit library calls.
+
;; True for register that is pseudo register.
(define_predicate "pseudo_register_operand"
- (and (match_code "reg")
- (match_test "!HARD_REGISTER_P (op)")))
+ (and (match_operand 0 "register_operand")
+ (not (and (match_code "reg")
+ (match_test "HARD_REGISTER_P (op)")))))
+
+;; True for operand that is pseudo register or CONST_INT.
+(define_predicate "pseudo_register_or_const_int_operand"
+ (ior (match_operand 0 "const_int_operand")
+ (match_operand 0 "pseudo_register_operand")))
+
+;; We keep combiner from inserting hard registers into the input of sign- and
+;; zero-extends. A hard register in the input operand is not wanted because
+;; 32-bit multiply patterns clobber some hard registers and extends with a
+;; hard register that overlaps these clobbers won't combine to a widening
+;; multiplication. There is no need for combine to propagate or insert
+;; hard registers, register allocation can do it just as well.
+
+;; True for operand that is pseudo register at combine time.
+(define_predicate "combine_pseudo_register_operand"
+ (ior (match_operand 0 "pseudo_register_operand")
+ (and (match_operand 0 "register_operand")
+ (match_test "reload_completed || reload_in_progress"))))
;; Return true if OP is a constant integer that is either
;; 8 or 16 or 24.
@@ -189,3 +213,18 @@
(define_predicate "register_or_s9_operand"
(ior (match_operand 0 "register_operand")
(match_operand 0 "s9_operand")))
+
+;; Unsigned CONST_INT that fits in 16 bits, i.e. 0..65536.
+(define_predicate "u16_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 0, (1<<16)-1)")))
+
+;; Signed CONST_INT that fits in 16 bits, i.e. -32768..32767.
+(define_predicate "s16_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), -(1<<15), (1<<15)-1)")))
+
+;; One-extended CONST_INT that fits in 16 bits, i.e. -65536..-1.
+(define_predicate "o16_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), -(1<<16), -1)")))
diff --git a/gcc/config/avr/t-avr b/gcc/config/avr/t-avr
index a5357f0..d79dd5a 100644
--- a/gcc/config/avr/t-avr
+++ b/gcc/config/avr/t-avr
@@ -41,7 +41,9 @@ LIB1ASMFUNCS = \
_mulhi3 \
_mulhisi3 \
_umulhisi3 \
- _xmulhisi3_exit \
+ _usmulhisi3 \
+ _muluhisi3 \
+ _mulshisi3 \
_mulsi3 \
_udivmodqi4 \
_divmodqi4 \