aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorg-Johann Lay <avr@gjlay.de>2011-07-29 09:33:54 +0000
committerGeorg-Johann Lay <gjl@gcc.gnu.org>2011-07-29 09:33:54 +0000
commit6130646b8811ec5af4d58bd08fee7ebd8ffbc973 (patch)
tree6f22c438fb8d622f073b27db738c553a713ab2b9
parentf2b4b91a9e3b311038effb3b200ded7e6cdba777 (diff)
downloadgcc-6130646b8811ec5af4d58bd08fee7ebd8ffbc973.zip
gcc-6130646b8811ec5af4d58bd08fee7ebd8ffbc973.tar.gz
gcc-6130646b8811ec5af4d58bd08fee7ebd8ffbc973.tar.bz2
re PR target/49313 (Inefficient libgcc implementations for avr)
PR target/49313 * config/avr/libgcc.S (__ffshi2): Don't skip 2-word instruction. (__ctzsi2): Result for 0 may be undefined. (__ctzhi2): Result for 0 may be undefined. (__popcounthi2): Don't clobber r30. Use __popcounthi2_tail. (__popcountsi2): Ditto. And don't clobber r26. (__popcountdi2): Ditto. And don't clobber r27. * config/avr/avr.md (UNSPEC_COPYSIGN): New c_enum. (parityhi2): New expand. (paritysi2): New expand. (popcounthi2): New expand. (popcountsi2): New expand. (clzhi2): New expand. (clzsi2): New expand. (ctzhi2): New expand. (ctzsi2): New expand. (ffshi2): New expand. (ffssi2): New expand. (copysignsf3): New insn. (bswapsi2): New expand. (*parityhi2.libgcc): New insn. (*parityqihi2.libgcc): New insn. (*paritysihi2.libgcc): New insn. (*popcounthi2.libgcc): New insn. (*popcountsi2.libgcc): New insn. (*popcountqi2.libgcc): New insn. (*popcountqihi2.libgcc): New insn-and-split. (*clzhi2.libgcc): New insn. (*clzsihi2.libgcc): New insn. (*ctzhi2.libgcc): New insn. (*ctzsihi2.libgcc): New insn. (*ffshi2.libgcc): New insn. (*ffssihi2.libgcc): New insn. (*bswapsi2.libgcc): New insn. From-SVN: r176920
-rw-r--r--gcc/ChangeLog37
-rw-r--r--gcc/config/avr/avr.md290
-rw-r--r--gcc/config/avr/libgcc.S47
3 files changed, 355 insertions, 19 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index a184cc2..2d12436 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,40 @@
+2011-07-29 Georg-Johann Lay <avr@gjlay.de>
+
+ PR target/49313
+ * config/avr/libgcc.S (__ffshi2): Don't skip 2-word instruction.
+ (__ctzsi2): Result for 0 may be undefined.
+ (__ctzhi2): Result for 0 may be undefined.
+ (__popcounthi2): Don't clobber r30. Use __popcounthi2_tail.
+ (__popcountsi2): Ditto. And don't clobber r26.
+ (__popcountdi2): Ditto. And don't clobber r27.
+ * config/avr/avr.md (UNSPEC_COPYSIGN): New c_enum.
+ (parityhi2): New expand.
+ (paritysi2): New expand.
+ (popcounthi2): New expand.
+ (popcountsi2): New expand.
+ (clzhi2): New expand.
+ (clzsi2): New expand.
+ (ctzhi2): New expand.
+ (ctzsi2): New expand.
+ (ffshi2): New expand.
+ (ffssi2): New expand.
+ (copysignsf3): New insn.
+ (bswapsi2): New expand.
+ (*parityhi2.libgcc): New insn.
+ (*parityqihi2.libgcc): New insn.
+ (*paritysihi2.libgcc): New insn.
+ (*popcounthi2.libgcc): New insn.
+ (*popcountsi2.libgcc): New insn.
+ (*popcountqi2.libgcc): New insn.
+ (*popcountqihi2.libgcc): New insn-and-split.
+ (*clzhi2.libgcc): New insn.
+ (*clzsihi2.libgcc): New insn.
+ (*ctzhi2.libgcc): New insn.
+ (*ctzsihi2.libgcc): New insn.
+ (*ffshi2.libgcc): New insn.
+ (*ffssihi2.libgcc): New insn.
+ (*bswapsi2.libgcc): New insn.
+
2011-07-29 Richard Guenther <rguenther@suse.de>
* tree-vrp.c (get_value_range): Only set parameter default
diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md
index a14d96d..2c215fd 100644
--- a/gcc/config/avr/avr.md
+++ b/gcc/config/avr/avr.md
@@ -55,6 +55,7 @@
UNSPEC_FMUL
UNSPEC_FMULS
UNSPEC_FMULSU
+ UNSPEC_COPYSIGN
])
(define_c_enum "unspecv"
@@ -3941,6 +3942,295 @@
[(set_attr "length" "9")
(set_attr "cc" "clobber")])
+
+;; Parity
+
+(define_expand "parityhi2"
+ [(set (reg:HI 24)
+ (match_operand:HI 1 "register_operand" ""))
+ (set (reg:HI 24)
+ (parity:HI (reg:HI 24)))
+ (set (match_operand:HI 0 "register_operand" "")
+ (reg:HI 24))]
+ ""
+ "")
+
+(define_expand "paritysi2"
+ [(set (reg:SI 22)
+ (match_operand:SI 1 "register_operand" ""))
+ (set (reg:HI 24)
+ (parity:HI (reg:SI 22)))
+ (set (match_dup 2)
+ (reg:HI 24))
+ (set (match_operand:SI 0 "register_operand" "")
+ (zero_extend:SI (match_dup 2)))]
+ ""
+ {
+ operands[2] = gen_reg_rtx (HImode);
+ })
+
+(define_insn "*parityhi2.libgcc"
+ [(set (reg:HI 24)
+ (parity:HI (reg:HI 24)))]
+ ""
+ "%~call __parityhi2"
+ [(set_attr "type" "xcall")
+ (set_attr "cc" "clobber")])
+
+(define_insn "*parityqihi2.libgcc"
+ [(set (reg:HI 24)
+ (parity:HI (reg:QI 24)))]
+ ""
+ "%~call __parityqi2"
+ [(set_attr "type" "xcall")
+ (set_attr "cc" "clobber")])
+
+(define_insn "*paritysihi2.libgcc"
+ [(set (reg:HI 24)
+ (parity:HI (reg:SI 22)))]
+ ""
+ "%~call __paritysi2"
+ [(set_attr "type" "xcall")
+ (set_attr "cc" "clobber")])
+
+
+;; Popcount
+
+(define_expand "popcounthi2"
+ [(set (reg:HI 24)
+ (match_operand:HI 1 "register_operand" ""))
+ (set (reg:HI 24)
+ (popcount:HI (reg:HI 24)))
+ (set (match_operand:HI 0 "register_operand" "")
+ (reg:HI 24))]
+ ""
+ "")
+
+(define_expand "popcountsi2"
+ [(set (reg:SI 22)
+ (match_operand:SI 1 "register_operand" ""))
+ (set (reg:HI 24)
+ (popcount:HI (reg:SI 22)))
+ (set (match_dup 2)
+ (reg:HI 24))
+ (set (match_operand:SI 0 "register_operand" "")
+ (zero_extend:SI (match_dup 2)))]
+ ""
+ {
+ operands[2] = gen_reg_rtx (HImode);
+ })
+
+(define_insn "*popcounthi2.libgcc"
+ [(set (reg:HI 24)
+ (popcount:HI (reg:HI 24)))]
+ ""
+ "%~call __popcounthi2"
+ [(set_attr "type" "xcall")
+ (set_attr "cc" "clobber")])
+
+(define_insn "*popcountsi2.libgcc"
+ [(set (reg:HI 24)
+ (popcount:HI (reg:SI 22)))]
+ ""
+ "%~call __popcountsi2"
+ [(set_attr "type" "xcall")
+ (set_attr "cc" "clobber")])
+
+(define_insn "*popcountqi2.libgcc"
+ [(set (reg:QI 24)
+ (popcount:QI (reg:QI 24)))]
+ ""
+ "%~call __popcountqi2"
+ [(set_attr "type" "xcall")
+ (set_attr "cc" "clobber")])
+
+(define_insn_and_split "*popcountqihi2.libgcc"
+ [(set (reg:HI 24)
+ (popcount:HI (reg:QI 24)))]
+ ""
+ "#"
+ ""
+ [(set (reg:QI 24)
+ (popcount:QI (reg:QI 24)))
+ (set (reg:QI 25)
+ (const_int 0))]
+ "")
+
+;; Count Leading Zeros
+
+(define_expand "clzhi2"
+ [(set (reg:HI 24)
+ (match_operand:HI 1 "register_operand" ""))
+ (parallel [(set (reg:HI 24)
+ (clz:HI (reg:HI 24)))
+ (clobber (reg:QI 26))])
+ (set (match_operand:HI 0 "register_operand" "")
+ (reg:HI 24))]
+ ""
+ "")
+
+(define_expand "clzsi2"
+ [(set (reg:SI 22)
+ (match_operand:SI 1 "register_operand" ""))
+ (parallel [(set (reg:HI 24)
+ (clz:HI (reg:SI 22)))
+ (clobber (reg:QI 26))])
+ (set (match_dup 2)
+ (reg:HI 24))
+ (set (match_operand:SI 0 "register_operand" "")
+ (zero_extend:SI (match_dup 2)))]
+ ""
+ {
+ operands[2] = gen_reg_rtx (HImode);
+ })
+
+(define_insn "*clzhi2.libgcc"
+ [(set (reg:HI 24)
+ (clz:HI (reg:HI 24)))
+ (clobber (reg:QI 26))]
+ ""
+ "%~call __clzhi2"
+ [(set_attr "type" "xcall")
+ (set_attr "cc" "clobber")])
+
+(define_insn "*clzsihi2.libgcc"
+ [(set (reg:HI 24)
+ (clz:HI (reg:SI 22)))
+ (clobber (reg:QI 26))]
+ ""
+ "%~call __clzsi2"
+ [(set_attr "type" "xcall")
+ (set_attr "cc" "clobber")])
+
+;; Count Trailing Zeros
+
+(define_expand "ctzhi2"
+ [(set (reg:HI 24)
+ (match_operand:HI 1 "register_operand" ""))
+ (parallel [(set (reg:HI 24)
+ (ctz:HI (reg:HI 24)))
+ (clobber (reg:QI 26))])
+ (set (match_operand:HI 0 "register_operand" "")
+ (reg:HI 24))]
+ ""
+ "")
+
+(define_expand "ctzsi2"
+ [(set (reg:SI 22)
+ (match_operand:SI 1 "register_operand" ""))
+ (parallel [(set (reg:HI 24)
+ (ctz:HI (reg:SI 22)))
+ (clobber (reg:QI 22))
+ (clobber (reg:QI 26))])
+ (set (match_dup 2)
+ (reg:HI 24))
+ (set (match_operand:SI 0 "register_operand" "")
+ (zero_extend:SI (match_dup 2)))]
+ ""
+ {
+ operands[2] = gen_reg_rtx (HImode);
+ })
+
+(define_insn "*ctzhi2.libgcc"
+ [(set (reg:HI 24)
+ (ctz:HI (reg:HI 24)))
+ (clobber (reg:QI 26))]
+ ""
+ "%~call __ctzhi2"
+ [(set_attr "type" "xcall")
+ (set_attr "cc" "clobber")])
+
+(define_insn "*ctzsihi2.libgcc"
+ [(set (reg:HI 24)
+ (ctz:HI (reg:SI 22)))
+ (clobber (reg:QI 22))
+ (clobber (reg:QI 26))]
+ ""
+ "%~call __ctzsi2"
+ [(set_attr "type" "xcall")
+ (set_attr "cc" "clobber")])
+
+;; Find First Set
+
+(define_expand "ffshi2"
+ [(set (reg:HI 24)
+ (match_operand:HI 1 "register_operand" ""))
+ (parallel [(set (reg:HI 24)
+ (ffs:HI (reg:HI 24)))
+ (clobber (reg:QI 26))])
+ (set (match_operand:HI 0 "register_operand" "")
+ (reg:HI 24))]
+ ""
+ "")
+
+(define_expand "ffssi2"
+ [(set (reg:SI 22)
+ (match_operand:SI 1 "register_operand" ""))
+ (parallel [(set (reg:HI 24)
+ (ffs:HI (reg:SI 22)))
+ (clobber (reg:QI 22))
+ (clobber (reg:QI 26))])
+ (set (match_dup 2)
+ (reg:HI 24))
+ (set (match_operand:SI 0 "register_operand" "")
+ (zero_extend:SI (match_dup 2)))]
+ ""
+ {
+ operands[2] = gen_reg_rtx (HImode);
+ })
+
+(define_insn "*ffshi2.libgcc"
+ [(set (reg:HI 24)
+ (ffs:HI (reg:HI 24)))
+ (clobber (reg:QI 26))]
+ ""
+ "%~call __ffshi2"
+ [(set_attr "type" "xcall")
+ (set_attr "cc" "clobber")])
+
+(define_insn "*ffssihi2.libgcc"
+ [(set (reg:HI 24)
+ (ffs:HI (reg:SI 22)))
+ (clobber (reg:QI 22))
+ (clobber (reg:QI 26))]
+ ""
+ "%~call __ffssi2"
+ [(set_attr "type" "xcall")
+ (set_attr "cc" "clobber")])
+
+;; Copysign
+
+(define_insn "copysignsf3"
+ [(set (match_operand:SF 0 "register_operand" "=r")
+ (unspec:SF [(match_operand:SF 1 "register_operand" "0")
+ (match_operand:SF 2 "register_operand" "r")]
+ UNSPEC_COPYSIGN))]
+ ""
+ "bst %D2,7\;bld %D0,7"
+ [(set_attr "length" "2")
+ (set_attr "cc" "none")])
+
+;; Swap Bytes (change byte-endianess)
+
+(define_expand "bswapsi2"
+ [(set (reg:SI 22)
+ (match_operand:SI 1 "register_operand" ""))
+ (set (reg:SI 22)
+ (bswap:SI (reg:SI 22)))
+ (set (match_operand:SI 0 "register_operand" "")
+ (reg:SI 22))]
+ ""
+ "")
+
+(define_insn "*bswapsi2.libgcc"
+ [(set (reg:SI 22)
+ (bswap:SI (reg:SI 22)))]
+ ""
+ "%~call __bswapsi2"
+ [(set_attr "type" "xcall")
+ (set_attr "cc" "clobber")])
+
+
;; CPU instructions
;; NOP taking 1 or 2 Ticks
diff --git a/gcc/config/avr/libgcc.S b/gcc/config/avr/libgcc.S
index d3d6e4f..f6084a7 100644
--- a/gcc/config/avr/libgcc.S
+++ b/gcc/config/avr/libgcc.S
@@ -1074,9 +1074,15 @@ ENDF __ffssi2
;; clobbers: r26
DEFUN __ffshi2
clr r26
+#ifdef __AVR_HAVE_JMP_CALL__
+ ;; Some cores have problem skipping 2-word instruction
+ tst r24
+ breq 2f
+#else
cpse r24, __zero_reg__
+#endif /* __AVR_HAVE_JMP_CALL__ */
1: XJMP __loop_ffsqi2
- ldi r26, 8
+2: ldi r26, 8
or r24, r25
brne 1b
ret
@@ -1106,12 +1112,12 @@ ENDF __loop_ffsqi2
#if defined (L_ctzsi2)
;; count trailing zeros
;; r25:r24 = ctz32 (r25:r22)
-;; ctz(0) = 32
+;; clobbers: r26, r22
+;; ctz(0) = 255
+;; Note that ctz(0) in undefined for GCC
DEFUN __ctzsi2
XCALL __ffssi2
dec r24
- sbrc r24, 7
- ldi r24, 32
ret
ENDF __ctzsi2
#endif /* defined (L_ctzsi2) */
@@ -1119,12 +1125,12 @@ ENDF __ctzsi2
#if defined (L_ctzhi2)
;; count trailing zeros
;; r25:r24 = ctz16 (r25:r24)
-;; ctz(0) = 16
+;; clobbers: r26
+;; ctz(0) = 255
+;; Note that ctz(0) in undefined for GCC
DEFUN __ctzhi2
XCALL __ffshi2
dec r24
- sbrc r24, 7
- ldi r24, 16
ret
ENDF __ctzhi2
#endif /* defined (L_ctzhi2) */
@@ -1258,47 +1264,50 @@ ENDF __parityqi2
#if defined (L_popcounthi2)
;; population count
;; r25:r24 = popcount16 (r25:r24)
-;; clobbers: r30, __tmp_reg__
+;; clobbers: __tmp_reg__
DEFUN __popcounthi2
XCALL __popcountqi2
- mov r30, r24
+ push r24
mov r24, r25
XCALL __popcountqi2
- add r24, r30
clr r25
- ret
+ ;; FALLTHRU
ENDF __popcounthi2
+
+DEFUN __popcounthi2_tail
+ pop __tmp_reg__
+ add r24, __tmp_reg__
+ ret
+ENDF __popcounthi2_tail
#endif /* defined (L_popcounthi2) */
#if defined (L_popcountsi2)
;; population count
;; r25:r24 = popcount32 (r25:r22)
-;; clobbers: r26, r30, __tmp_reg__
+;; clobbers: __tmp_reg__
DEFUN __popcountsi2
XCALL __popcounthi2
- mov r26, r24
+ push r24
mov_l r24, r22
mov_h r25, r23
XCALL __popcounthi2
- add r24, r26
- ret
+ XJMP __popcounthi2_tail
ENDF __popcountsi2
#endif /* defined (L_popcountsi2) */
#if defined (L_popcountdi2)
;; population count
;; r25:r24 = popcount64 (r25:r18)
-;; clobbers: r22, r23, r26, r27, r30, __tmp_reg__
+;; clobbers: r22, r23, __tmp_reg__
DEFUN __popcountdi2
XCALL __popcountsi2
- mov r27, r24
+ push r24
mov_l r22, r18
mov_h r23, r19
mov_l r24, r20
mov_h r25, r21
XCALL __popcountsi2
- add r24, r27
- ret
+ XJMP __popcounthi2_tail
ENDF __popcountdi2
#endif /* defined (L_popcountdi2) */