aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndy Hutchinson <hutchinsonandy@gcc.gnu.org>2010-01-08 23:01:45 +0000
committerAndy Hutchinson <hutchinsonandy@gcc.gnu.org>2010-01-08 23:01:45 +0000
commit58f0ea2f8565b62d2060c2d244cc7d98b5b0a7a5 (patch)
treef0cc63ddccf87079365955c57a69e269a70d2260
parent601fc26e4e7625131573450f11dfb7ae5f57ffeb (diff)
downloadgcc-58f0ea2f8565b62d2060c2d244cc7d98b5b0a7a5.zip
gcc-58f0ea2f8565b62d2060c2d244cc7d98b5b0a7a5.tar.gz
gcc-58f0ea2f8565b62d2060c2d244cc7d98b5b0a7a5.tar.bz2
re PR target/41885 (AVR Rotate patterns do not correctly consider overlap.)
2010-01-08 Andy Hutchinson <hutchinsonandy@gcc.gnu.org> PR target/41885 * avr.md (rotlqi3): Add CONST_INT_P check. (rotlhi3): Delete. (rotlhi3_8): Delete. (rotlsi3): Delete. (rotlsi3_8): Delete. (rotlsi3_16): Delete. (rotlsi3_24): Delete. (rotl<mode>3): New. (*rotw<mode>3): New. (*rotb<mode>3): New. *avr.c (avr_rotate_bytes): New function. *avr-proto.h (avr_rotate_bytes): New function. From-SVN: r155746
-rw-r--r--gcc/ChangeLog16
-rw-r--r--gcc/config/avr/avr-protos.h1
-rw-r--r--gcc/config/avr/avr.c136
-rw-r--r--gcc/config/avr/avr.md223
4 files changed, 210 insertions, 166 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index b524bae..c634362 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,19 @@
+2010-01-08 Andy Hutchinson <hutchinsonandy@gcc.gnu.org>
+
+ PR target/41885
+ * avr.md (rotlqi3): Add CONST_INT_P check.
+ (rotlhi3): Delete.
+ (rotlhi3_8): Delete.
+ (rotlsi3): Delete.
+ (rotlsi3_8): Delete.
+ (rotlsi3_16): Delete.
+ (rotlsi3_24): Delete.
+ (rotl<mode>3): New.
+ (*rotw<mode>3): New.
+ (*rotb<mode>3): New.
+ * avr.c (avr_rotate_bytes): New function.
+ * avr-proto.h (avr_rotate_bytes): New function.
+
2010-01-08 Steve Ellcey <sje@cup.hp.com>
PR target/37454
diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h
index ea4fe27..ea55a9b 100644
--- a/gcc/config/avr/avr-protos.h
+++ b/gcc/config/avr/avr-protos.h
@@ -82,6 +82,7 @@ extern const char *ashrsi3_out (rtx insn, rtx operands[], int *len);
extern const char *lshrqi3_out (rtx insn, rtx operands[], int *len);
extern const char *lshrhi3_out (rtx insn, rtx operands[], int *len);
extern const char *lshrsi3_out (rtx insn, rtx operands[], int *len);
+extern bool avr_rotate_bytes (rtx operands[]);
extern void expand_prologue (void);
extern void expand_epilogue (void);
diff --git a/gcc/config/avr/avr.c b/gcc/config/avr/avr.c
index 83ee0ef..a5f3db8 100644
--- a/gcc/config/avr/avr.c
+++ b/gcc/config/avr/avr.c
@@ -4218,6 +4218,142 @@ lshrsi3_out (rtx insn, rtx operands[], int *len)
return "";
}
+/* Create RTL split patterns for byte sized rotate expressions. This
+ produces a series of move instructions and considers overlap situations.
+ Overlapping non-HImode operands need a scratch register. */
+
+bool
+avr_rotate_bytes (rtx operands[])
+{
+ int i, j;
+ enum machine_mode mode = GET_MODE (operands[0]);
+ bool overlapped = reg_overlap_mentioned_p (operands[0], operands[1]);
+ bool same_reg = rtx_equal_p (operands[0], operands[1]);
+ int num = INTVAL (operands[2]);
+ rtx scratch = operands[3];
+ /* Work out if byte or word move is needed. Odd byte rotates need QImode.
+ Word move if no scratch is needed, otherwise use size of scratch. */
+ enum machine_mode move_mode = QImode;
+ if (num & 0xf)
+ move_mode = QImode;
+ else if ((mode == SImode && !same_reg) || !overlapped)
+ move_mode = HImode;
+ else
+ move_mode = GET_MODE (scratch);
+
+ /* Force DI rotate to use QI moves since other DI moves are currently split
+ into QI moves so forward propagation works better. */
+ if (mode == DImode)
+ move_mode = QImode;
+ /* Make scratch smaller if needed. */
+ if (GET_MODE (scratch) == HImode && move_mode == QImode)
+ scratch = simplify_gen_subreg (move_mode, scratch, HImode, 0);
+
+ int move_size = GET_MODE_SIZE (move_mode);
+ /* Number of bytes/words to rotate. */
+ int offset = (num >> 3) / move_size;
+ /* Number of moves needed. */
+ int size = GET_MODE_SIZE (mode) / move_size;
+ /* Himode byte swap is special case to avoid a scratch register. */
+ if (mode == HImode && same_reg)
+ {
+ /* HImode byte swap, using xor. This is as quick as using scratch. */
+ rtx src, dst;
+ src = simplify_gen_subreg (move_mode, operands[1], mode, 0);
+ dst = simplify_gen_subreg (move_mode, operands[0], mode, 1);
+ if (!rtx_equal_p (dst, src))
+ {
+ emit_move_insn (dst, gen_rtx_XOR (QImode, dst, src));
+ emit_move_insn (src, gen_rtx_XOR (QImode, src, dst));
+ emit_move_insn (dst, gen_rtx_XOR (QImode, dst, src));
+ }
+ }
+ else
+ {
+ /* Create linked list of moves to determine move order. */
+ struct {
+ rtx src, dst;
+ int links;
+ } move[size + 8];
+
+ /* Generate list of subreg moves. */
+ for (i = 0; i < size; i++)
+ {
+ int from = i;
+ int to = (from + offset) % size;
+ move[i].src = simplify_gen_subreg (move_mode, operands[1],
+ mode, from * move_size);
+ move[i].dst = simplify_gen_subreg (move_mode, operands[0],
+ mode, to * move_size);
+ move[i].links = -1;
+ }
+ /* Mark dependence where a dst of one move is the src of another move.
+ The first move is a conflict as it must wait until second is
+ performed. We ignore moves to self - we catch this later. */
+ if (overlapped)
+ for (i = 0; i < size; i++)
+ if (reg_overlap_mentioned_p (move[i].dst, operands[1]))
+ for (j = 0; j < size; j++)
+ if (j != i && rtx_equal_p (move[j].src, move[i].dst))
+ {
+ /* The dst of move i is the src of move j. */
+ move[i].links = j;
+ break;
+ }
+
+ int blocked = -1;
+ int moves = 0;
+ /* Go through move list and perform non-conflicting moves. As each
+ non-overlapping move is made, it may remove other conflicts
+ so the process is repeated until no conflicts remain. */
+ do
+ {
+ blocked = -1;
+ moves = 0;
+ /* Emit move where dst is not also a src or we have used that
+ src already. */
+ for (i = 0; i < size; i++)
+ if (move[i].src != NULL_RTX)
+ if (move[i].links == -1 || move[move[i].links].src == NULL_RTX)
+ {
+ moves++;
+ /* Ignore NOP moves to self. */
+ if (!rtx_equal_p (move[i].dst, move[i].src))
+ emit_move_insn (move[i].dst, move[i].src);
+
+ /* Remove conflict from list. */
+ move[i].src = NULL_RTX;
+ }
+ else
+ blocked = i;
+
+ /* Check for deadlock. This is when no moves occurred and we have
+ at least one blocked move. */
+ if (moves == 0 && blocked != -1)
+ {
+ /* Need to use scratch register to break deadlock.
+ Add move to put dst of blocked move into scratch.
+ When this move occurs, it will break chain deadlock.
+ The scratch register is substituted for real move. */
+
+ move[size].src = move[blocked].dst;
+ move[size].dst = scratch;
+ /* Scratch move is never blocked. */
+ move[size].links = -1;
+ /* Make sure we have valid link. */
+ gcc_assert (move[blocked].links != -1);
+ /* Replace src of blocking move with scratch reg. */
+ move[move[blocked].links].src = scratch;
+ /* Make dependent on scratch move occuring. */
+ move[blocked].links = size;
+ size=size+1;
+ }
+ }
+ while (blocked != -1);
+ }
+ return true;
+}
+
/* Modifies the length assigned to instruction INSN
LEN is the initially computed length of the insn. */
diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md
index f0e59eb..9b6b692 100644
--- a/gcc/config/avr/avr.md
+++ b/gcc/config/avr/avr.md
@@ -121,6 +121,8 @@
;; Define mode iterator
(define_mode_iterator QISI [(QI "") (HI "") (SI "")])
(define_mode_iterator QIDI [(QI "") (HI "") (SI "") (DI "")])
+(define_mode_iterator HIDI [(HI "") (SI "") (DI "")])
+(define_mode_iterator HISI [(HI "") (SI "")])
;;========================================================================
;; The following is used by nonlocal_goto and setjmp.
@@ -1485,7 +1487,7 @@
""
"
{
- if (INTVAL (operands[2]) != 4)
+ if (!CONST_INT_P (operands[2]) || (INTVAL (operands[2]) != 4))
FAIL;
}")
@@ -1498,185 +1500,74 @@
[(set_attr "length" "1")
(set_attr "cc" "none")])
-(define_expand "rotlhi3"
- [(set (match_operand:HI 0 "register_operand" "")
- (rotate:HI (match_operand:HI 1 "register_operand" "")
- (match_operand:HI 2 "const_int_operand" "")))]
- ""
- "
-{
- if (INTVAL (operands[2]) != 8)
- FAIL;
-}")
+;; Split all rotates of HI,SI and DImode registers where rotation is by
+;; a whole number of bytes. The split creates the appropriate moves and
+;; considers all overlap situations. DImode is split before reload.
-(define_insn_and_split "*rotlhi3_8"
- [(set (match_operand:HI 0 "register_operand" "=r")
- (rotate:HI (match_operand:HI 1 "register_operand" "r")
- (const_int 8)))]
- ""
- "mov __tmp_reg__,%A0
- mov %A0,%B0
- mov %B0, __tmp_reg__"
- "reload_completed
- && REGNO (operands[0]) != REGNO (operands[1])"
- [(set (match_dup 2) (match_dup 5))
- (set (match_dup 3) (match_dup 4))]
- "operands[2] = gen_lowpart (QImode, operands[0]);
- operands[3] = gen_highpart (QImode, operands[0]);
-
- operands[4] = gen_lowpart (QImode, operands[1]);
- operands[5] = gen_highpart (QImode, operands[1]);"
- [(set_attr "length" "3")
- (set_attr "cc" "none")])
+;; HImode does not need scratch. Use attribute for this constraint.
+;; Use QI scratch for DI mode as this is often split into byte sized operands.
-(define_expand "rotlsi3"
- [(set (match_operand:SI 0 "register_operand" "")
- (rotate:SI (match_operand:SI 1 "register_operand" "")
- (match_operand:SI 2 "const_int_operand" "")))]
+(define_mode_attr rotx [(DI "&r,&r,X") (SI "&r,&r,X") (HI "X,X,X")])
+(define_mode_attr rotsmode [(DI "QI") (SI "HI") (HI "QI")])
+
+(define_expand "rotl<mode>3"
+ [(parallel [(set (match_operand:HIDI 0 "register_operand" "")
+ (rotate:HIDI (match_operand:HIDI 1 "register_operand" "")
+ (match_operand:VOID 2 "const_int_operand" "")))
+ (clobber (match_operand 3 ""))])]
""
"
{
- if (INTVAL (operands[2]) != 8
- || INTVAL (operands[2]) != 16
- || INTVAL (operands[2]) != 24)
+ if (CONST_INT_P (operands[2]) && 0 == (INTVAL (operands[2]) % 8))
+ {
+ if (AVR_HAVE_MOVW && 0 == INTVAL (operands[2]) % 16)
+ operands[3] = gen_reg_rtx (<rotsmode>mode);
+ else
+ operands[3] = gen_reg_rtx (QImode);
+ }
+ else
FAIL;
}")
-(define_insn_and_split "*rotlsi3_16"
- [(set (match_operand:SI 0 "register_operand" "=r")
- (rotate:SI (match_operand:SI 1 "register_operand" "r")
- (const_int 16)))]
- ""
- "{mov __tmp_reg__,%A1\;mov %A0,%C1\;mov %C0, __tmp_reg__\;mov __tmp_reg__,%B1\;mov %B0,%D1\;mov %D0, __tmp_reg__|movw __tmp_reg__,%A1\;movw %A0,%C1\;movw %C0, __tmp_reg__\;clr __zero_reg__}"
- "reload_completed
- && REGNO (operands[0]) != REGNO (operands[1])"
- [(set (match_dup 2) (match_dup 5))
- (set (match_dup 3) (match_dup 4))]
- "unsigned int si_lo_off = subreg_lowpart_offset (HImode, SImode);
- unsigned int si_hi_off = subreg_highpart_offset (HImode, SImode);
- operands[2] = simplify_gen_subreg (HImode, operands[0], SImode, si_lo_off);
- operands[3] = simplify_gen_subreg (HImode, operands[0], SImode, si_hi_off);
+;; Overlapping non-HImode registers often (but not always) need a scratch.
+;; The best we can do is use early clobber alternative "#&r" so that
+;; completely non-overlapping operands dont get a scratch but # so register
+;; allocation does not prefer non-overlapping.
- operands[4] = simplify_gen_subreg (HImode, operands[1], SImode, si_lo_off);
- operands[5] = simplify_gen_subreg (HImode, operands[1], SImode, si_hi_off);
- if (REGNO (operands[0]) == REGNO(operands[1]) + 2)
- {
- emit_move_insn (operands[3], operands[4]);
- DONE;
- }
- else if (REGNO (operands[0]) == REGNO(operands[1]) - 2)
- {
- emit_move_insn (operands[2], operands[5]);
- DONE;
- }"
- [(set (attr "length") (if_then_else (eq_attr "mcu_have_movw" "yes")
- (const_int 4)
- (const_int 6)))
- (set (attr "cc") (if_then_else (eq_attr "mcu_have_movw" "yes")
- (const_string "clobber")
- (const_string "none")))])
-
-(define_insn_and_split "*rotlsi3_8"
- [(set (match_operand:SI 0 "register_operand" "=r")
- (rotate:SI (match_operand:SI 1 "register_operand" "r")
- (const_int 8)))]
- ""
- "mov __tmp_reg__,%D1
- mov %D0,%C1
- mov %C0,%B1
- mov %B0,%A1
- mov %A0, __tmp_reg__"
- "reload_completed
- && REGNO (operands[0]) != REGNO (operands[1])"
+; Split word aligned rotates using scratch that is mode dependent.
+(define_insn_and_split "*rotw<mode>"
+ [(set (match_operand:HIDI 0 "register_operand" "=r,r,#&r")
+ (rotate:HIDI (match_operand:HIDI 1 "register_operand" "0,r,r")
+ (match_operand 2 "immediate_operand" "n,n,n")))
+ (clobber (match_operand:<rotsmode> 3 "register_operand" "=<rotx>" ))]
+ "(CONST_INT_P (operands[2]) &&
+ (0 == (INTVAL (operands[2]) % 16) && AVR_HAVE_MOVW))"
+ "#"
+ "&& (reload_completed || <MODE>mode == DImode)"
[(const_int 0)]
- "unsigned int si_lo_off = subreg_lowpart_offset (HImode, SImode);
- unsigned int si_hi_off = subreg_highpart_offset (HImode, SImode);
- unsigned int hi_lo_off = subreg_lowpart_offset (QImode, HImode);
- unsigned int hi_hi_off = subreg_highpart_offset (QImode, HImode);
-
- operands[2] = simplify_gen_subreg (HImode, operands[0], SImode, si_lo_off);
- operands[4] = simplify_gen_subreg (HImode, operands[0], SImode, si_hi_off);
- operands[3] = simplify_gen_subreg (QImode, operands[2], HImode, hi_hi_off);
- operands[2] = simplify_gen_subreg (QImode, operands[2], HImode, hi_lo_off);
- operands[5] = simplify_gen_subreg (QImode, operands[4], HImode, hi_hi_off);
- operands[4] = simplify_gen_subreg (QImode, operands[4], HImode, hi_lo_off);
-
- operands[6] = simplify_gen_subreg (HImode, operands[1], SImode, si_lo_off);
- operands[8] = simplify_gen_subreg (HImode, operands[1], SImode, si_hi_off);
- operands[7] = simplify_gen_subreg (QImode, operands[6], HImode, hi_hi_off);
- operands[6] = simplify_gen_subreg (QImode, operands[6], HImode, hi_lo_off);
- operands[9] = simplify_gen_subreg (QImode, operands[8], HImode, hi_hi_off);
- operands[8] = simplify_gen_subreg (QImode, operands[8], HImode, hi_lo_off);
-
- if (REGNO (operands[0]) < REGNO(operands[1]))
- {
- emit_move_insn (operands[2], operands[9]);
- emit_move_insn (operands[3], operands[6]);
- emit_move_insn (operands[4], operands[7]);
- emit_move_insn (operands[5], operands[8]);
- }
- else
- {
- emit_move_insn (operands[5], operands[8]);
- emit_move_insn (operands[2], operands[9]);
- emit_move_insn (operands[4], operands[7]);
- emit_move_insn (operands[3], operands[6]);
- }
- DONE;"
- [(set_attr "length" "5")
- (set_attr "cc" "none")])
+ "avr_rotate_bytes (operands);
+ DONE;"
+)
-(define_insn_and_split "*rotlsi3_24"
- [(set (match_operand:SI 0 "register_operand" "=r")
- (rotate:SI (match_operand:SI 1 "register_operand" "r")
- (const_int 24)))]
- ""
- "mov __tmp_reg__,%A1
- mov %A0,%B1
- mov %B0,%C1
- mov %C0,%D1
- mov %D0, __tmp_reg__"
- "reload_completed
- && REGNO (operands[0]) != REGNO (operands[1])"
+
+; Split byte aligned rotates using scratch that is always QI mode.
+(define_insn_and_split "*rotb<mode>"
+ [(set (match_operand:HIDI 0 "register_operand" "=r,r,#&r")
+ (rotate:HIDI (match_operand:HIDI 1 "register_operand" "0,r,r")
+ (match_operand 2 "immediate_operand" "n,n,n")))
+ (clobber (match_operand:QI 3 "register_operand" "=<rotx>" ))]
+ "(CONST_INT_P (operands[2]) &&
+ (8 == (INTVAL (operands[2]) % 16)
+ || (!AVR_HAVE_MOVW && 0 == (INTVAL (operands[2]) % 16))))"
+ "#"
+ "&& (reload_completed || <MODE>mode == DImode)"
[(const_int 0)]
- "unsigned int si_lo_off = subreg_lowpart_offset (HImode, SImode);
- unsigned int si_hi_off = subreg_highpart_offset (HImode, SImode);
- unsigned int hi_lo_off = subreg_lowpart_offset (QImode, HImode);
- unsigned int hi_hi_off = subreg_highpart_offset (QImode, HImode);
-
- operands[2] = simplify_gen_subreg (HImode, operands[0], SImode, si_lo_off);
- operands[4] = simplify_gen_subreg (HImode, operands[0], SImode, si_hi_off);
- operands[3] = simplify_gen_subreg (QImode, operands[2], HImode, hi_hi_off);
- operands[2] = simplify_gen_subreg (QImode, operands[2], HImode, hi_lo_off);
- operands[5] = simplify_gen_subreg (QImode, operands[4], HImode, hi_hi_off);
- operands[4] = simplify_gen_subreg (QImode, operands[4], HImode, hi_lo_off);
-
- operands[6] = simplify_gen_subreg (HImode, operands[1], SImode, si_lo_off);
- operands[8] = simplify_gen_subreg (HImode, operands[1], SImode, si_hi_off);
- operands[7] = simplify_gen_subreg (QImode, operands[6], HImode, hi_hi_off);
- operands[6] = simplify_gen_subreg (QImode, operands[6], HImode, hi_lo_off);
- operands[9] = simplify_gen_subreg (QImode, operands[8], HImode, hi_hi_off);
- operands[8] = simplify_gen_subreg (QImode, operands[8], HImode, hi_lo_off);
-
- if (REGNO (operands[0]) < REGNO(operands[1]))
- {
- emit_move_insn (operands[2], operands[7]);
- emit_move_insn (operands[5], operands[6]);
- emit_move_insn (operands[3], operands[8]);
- emit_move_insn (operands[4], operands[9]);
- }
- else
- {
- emit_move_insn (operands[5], operands[6]);
- emit_move_insn (operands[4], operands[9]);
- emit_move_insn (operands[3], operands[8]);
- emit_move_insn (operands[2], operands[7]);
- }
- DONE;"
- [(set_attr "length" "5")
- (set_attr "cc" "none")])
+ "avr_rotate_bytes (operands);
+ DONE;"
+)
+
;;<< << << << << << << << << << << << << << << << << << << << << << << << << <<
;; arithmetic shift left