diff options
author | Andy Hutchinson <hutchinsonandy@gcc.gnu.org> | 2010-01-08 23:01:45 +0000 |
---|---|---|
committer | Andy Hutchinson <hutchinsonandy@gcc.gnu.org> | 2010-01-08 23:01:45 +0000 |
commit | 58f0ea2f8565b62d2060c2d244cc7d98b5b0a7a5 (patch) | |
tree | f0cc63ddccf87079365955c57a69e269a70d2260 | |
parent | 601fc26e4e7625131573450f11dfb7ae5f57ffeb (diff) | |
download | gcc-58f0ea2f8565b62d2060c2d244cc7d98b5b0a7a5.zip gcc-58f0ea2f8565b62d2060c2d244cc7d98b5b0a7a5.tar.gz gcc-58f0ea2f8565b62d2060c2d244cc7d98b5b0a7a5.tar.bz2 |
re PR target/41885 (AVR Rotate patterns do not correctly consider overlap.)
2010-01-08 Andy Hutchinson <hutchinsonandy@gcc.gnu.org>
PR target/41885
* avr.md (rotlqi3): Add CONST_INT_P check.
(rotlhi3): Delete.
(rotlhi3_8): Delete.
(rotlsi3): Delete.
(rotlsi3_8): Delete.
(rotlsi3_16): Delete.
(rotlsi3_24): Delete.
(rotl<mode>3): New.
(*rotw<mode>3): New.
(*rotb<mode>3): New.
*avr.c (avr_rotate_bytes): New function.
*avr-proto.h (avr_rotate_bytes): New function.
From-SVN: r155746
-rw-r--r-- | gcc/ChangeLog | 16 | ||||
-rw-r--r-- | gcc/config/avr/avr-protos.h | 1 | ||||
-rw-r--r-- | gcc/config/avr/avr.c | 136 | ||||
-rw-r--r-- | gcc/config/avr/avr.md | 223 |
4 files changed, 210 insertions, 166 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b524bae..c634362 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,19 @@ +2010-01-08 Andy Hutchinson <hutchinsonandy@gcc.gnu.org> + + PR target/41885 + * avr.md (rotlqi3): Add CONST_INT_P check. + (rotlhi3): Delete. + (rotlhi3_8): Delete. + (rotlsi3): Delete. + (rotlsi3_8): Delete. + (rotlsi3_16): Delete. + (rotlsi3_24): Delete. + (rotl<mode>3): New. + (*rotw<mode>3): New. + (*rotb<mode>3): New. + * avr.c (avr_rotate_bytes): New function. + * avr-proto.h (avr_rotate_bytes): New function. + 2010-01-08 Steve Ellcey <sje@cup.hp.com> PR target/37454 diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h index ea4fe27..ea55a9b 100644 --- a/gcc/config/avr/avr-protos.h +++ b/gcc/config/avr/avr-protos.h @@ -82,6 +82,7 @@ extern const char *ashrsi3_out (rtx insn, rtx operands[], int *len); extern const char *lshrqi3_out (rtx insn, rtx operands[], int *len); extern const char *lshrhi3_out (rtx insn, rtx operands[], int *len); extern const char *lshrsi3_out (rtx insn, rtx operands[], int *len); +extern bool avr_rotate_bytes (rtx operands[]); extern void expand_prologue (void); extern void expand_epilogue (void); diff --git a/gcc/config/avr/avr.c b/gcc/config/avr/avr.c index 83ee0ef..a5f3db8 100644 --- a/gcc/config/avr/avr.c +++ b/gcc/config/avr/avr.c @@ -4218,6 +4218,142 @@ lshrsi3_out (rtx insn, rtx operands[], int *len) return ""; } +/* Create RTL split patterns for byte sized rotate expressions. This + produces a series of move instructions and considers overlap situations. + Overlapping non-HImode operands need a scratch register. */ + +bool +avr_rotate_bytes (rtx operands[]) +{ + int i, j; + enum machine_mode mode = GET_MODE (operands[0]); + bool overlapped = reg_overlap_mentioned_p (operands[0], operands[1]); + bool same_reg = rtx_equal_p (operands[0], operands[1]); + int num = INTVAL (operands[2]); + rtx scratch = operands[3]; + /* Work out if byte or word move is needed. Odd byte rotates need QImode. + Word move if no scratch is needed, otherwise use size of scratch. */ + enum machine_mode move_mode = QImode; + if (num & 0xf) + move_mode = QImode; + else if ((mode == SImode && !same_reg) || !overlapped) + move_mode = HImode; + else + move_mode = GET_MODE (scratch); + + /* Force DI rotate to use QI moves since other DI moves are currently split + into QI moves so forward propagation works better. */ + if (mode == DImode) + move_mode = QImode; + /* Make scratch smaller if needed. */ + if (GET_MODE (scratch) == HImode && move_mode == QImode) + scratch = simplify_gen_subreg (move_mode, scratch, HImode, 0); + + int move_size = GET_MODE_SIZE (move_mode); + /* Number of bytes/words to rotate. */ + int offset = (num >> 3) / move_size; + /* Number of moves needed. */ + int size = GET_MODE_SIZE (mode) / move_size; + /* Himode byte swap is special case to avoid a scratch register. */ + if (mode == HImode && same_reg) + { + /* HImode byte swap, using xor. This is as quick as using scratch. */ + rtx src, dst; + src = simplify_gen_subreg (move_mode, operands[1], mode, 0); + dst = simplify_gen_subreg (move_mode, operands[0], mode, 1); + if (!rtx_equal_p (dst, src)) + { + emit_move_insn (dst, gen_rtx_XOR (QImode, dst, src)); + emit_move_insn (src, gen_rtx_XOR (QImode, src, dst)); + emit_move_insn (dst, gen_rtx_XOR (QImode, dst, src)); + } + } + else + { + /* Create linked list of moves to determine move order. */ + struct { + rtx src, dst; + int links; + } move[size + 8]; + + /* Generate list of subreg moves. */ + for (i = 0; i < size; i++) + { + int from = i; + int to = (from + offset) % size; + move[i].src = simplify_gen_subreg (move_mode, operands[1], + mode, from * move_size); + move[i].dst = simplify_gen_subreg (move_mode, operands[0], + mode, to * move_size); + move[i].links = -1; + } + /* Mark dependence where a dst of one move is the src of another move. + The first move is a conflict as it must wait until second is + performed. We ignore moves to self - we catch this later. */ + if (overlapped) + for (i = 0; i < size; i++) + if (reg_overlap_mentioned_p (move[i].dst, operands[1])) + for (j = 0; j < size; j++) + if (j != i && rtx_equal_p (move[j].src, move[i].dst)) + { + /* The dst of move i is the src of move j. */ + move[i].links = j; + break; + } + + int blocked = -1; + int moves = 0; + /* Go through move list and perform non-conflicting moves. As each + non-overlapping move is made, it may remove other conflicts + so the process is repeated until no conflicts remain. */ + do + { + blocked = -1; + moves = 0; + /* Emit move where dst is not also a src or we have used that + src already. */ + for (i = 0; i < size; i++) + if (move[i].src != NULL_RTX) + if (move[i].links == -1 || move[move[i].links].src == NULL_RTX) + { + moves++; + /* Ignore NOP moves to self. */ + if (!rtx_equal_p (move[i].dst, move[i].src)) + emit_move_insn (move[i].dst, move[i].src); + + /* Remove conflict from list. */ + move[i].src = NULL_RTX; + } + else + blocked = i; + + /* Check for deadlock. This is when no moves occurred and we have + at least one blocked move. */ + if (moves == 0 && blocked != -1) + { + /* Need to use scratch register to break deadlock. + Add move to put dst of blocked move into scratch. + When this move occurs, it will break chain deadlock. + The scratch register is substituted for real move. */ + + move[size].src = move[blocked].dst; + move[size].dst = scratch; + /* Scratch move is never blocked. */ + move[size].links = -1; + /* Make sure we have valid link. */ + gcc_assert (move[blocked].links != -1); + /* Replace src of blocking move with scratch reg. */ + move[move[blocked].links].src = scratch; + /* Make dependent on scratch move occuring. */ + move[blocked].links = size; + size=size+1; + } + } + while (blocked != -1); + } + return true; +} + /* Modifies the length assigned to instruction INSN LEN is the initially computed length of the insn. */ diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md index f0e59eb..9b6b692 100644 --- a/gcc/config/avr/avr.md +++ b/gcc/config/avr/avr.md @@ -121,6 +121,8 @@ ;; Define mode iterator (define_mode_iterator QISI [(QI "") (HI "") (SI "")]) (define_mode_iterator QIDI [(QI "") (HI "") (SI "") (DI "")]) +(define_mode_iterator HIDI [(HI "") (SI "") (DI "")]) +(define_mode_iterator HISI [(HI "") (SI "")]) ;;======================================================================== ;; The following is used by nonlocal_goto and setjmp. @@ -1485,7 +1487,7 @@ "" " { - if (INTVAL (operands[2]) != 4) + if (!CONST_INT_P (operands[2]) || (INTVAL (operands[2]) != 4)) FAIL; }") @@ -1498,185 +1500,74 @@ [(set_attr "length" "1") (set_attr "cc" "none")]) -(define_expand "rotlhi3" - [(set (match_operand:HI 0 "register_operand" "") - (rotate:HI (match_operand:HI 1 "register_operand" "") - (match_operand:HI 2 "const_int_operand" "")))] - "" - " -{ - if (INTVAL (operands[2]) != 8) - FAIL; -}") +;; Split all rotates of HI,SI and DImode registers where rotation is by +;; a whole number of bytes. The split creates the appropriate moves and +;; considers all overlap situations. DImode is split before reload. -(define_insn_and_split "*rotlhi3_8" - [(set (match_operand:HI 0 "register_operand" "=r") - (rotate:HI (match_operand:HI 1 "register_operand" "r") - (const_int 8)))] - "" - "mov __tmp_reg__,%A0 - mov %A0,%B0 - mov %B0, __tmp_reg__" - "reload_completed - && REGNO (operands[0]) != REGNO (operands[1])" - [(set (match_dup 2) (match_dup 5)) - (set (match_dup 3) (match_dup 4))] - "operands[2] = gen_lowpart (QImode, operands[0]); - operands[3] = gen_highpart (QImode, operands[0]); - - operands[4] = gen_lowpart (QImode, operands[1]); - operands[5] = gen_highpart (QImode, operands[1]);" - [(set_attr "length" "3") - (set_attr "cc" "none")]) +;; HImode does not need scratch. Use attribute for this constraint. +;; Use QI scratch for DI mode as this is often split into byte sized operands. -(define_expand "rotlsi3" - [(set (match_operand:SI 0 "register_operand" "") - (rotate:SI (match_operand:SI 1 "register_operand" "") - (match_operand:SI 2 "const_int_operand" "")))] +(define_mode_attr rotx [(DI "&r,&r,X") (SI "&r,&r,X") (HI "X,X,X")]) +(define_mode_attr rotsmode [(DI "QI") (SI "HI") (HI "QI")]) + +(define_expand "rotl<mode>3" + [(parallel [(set (match_operand:HIDI 0 "register_operand" "") + (rotate:HIDI (match_operand:HIDI 1 "register_operand" "") + (match_operand:VOID 2 "const_int_operand" ""))) + (clobber (match_operand 3 ""))])] "" " { - if (INTVAL (operands[2]) != 8 - || INTVAL (operands[2]) != 16 - || INTVAL (operands[2]) != 24) + if (CONST_INT_P (operands[2]) && 0 == (INTVAL (operands[2]) % 8)) + { + if (AVR_HAVE_MOVW && 0 == INTVAL (operands[2]) % 16) + operands[3] = gen_reg_rtx (<rotsmode>mode); + else + operands[3] = gen_reg_rtx (QImode); + } + else FAIL; }") -(define_insn_and_split "*rotlsi3_16" - [(set (match_operand:SI 0 "register_operand" "=r") - (rotate:SI (match_operand:SI 1 "register_operand" "r") - (const_int 16)))] - "" - "{mov __tmp_reg__,%A1\;mov %A0,%C1\;mov %C0, __tmp_reg__\;mov __tmp_reg__,%B1\;mov %B0,%D1\;mov %D0, __tmp_reg__|movw __tmp_reg__,%A1\;movw %A0,%C1\;movw %C0, __tmp_reg__\;clr __zero_reg__}" - "reload_completed - && REGNO (operands[0]) != REGNO (operands[1])" - [(set (match_dup 2) (match_dup 5)) - (set (match_dup 3) (match_dup 4))] - "unsigned int si_lo_off = subreg_lowpart_offset (HImode, SImode); - unsigned int si_hi_off = subreg_highpart_offset (HImode, SImode); - operands[2] = simplify_gen_subreg (HImode, operands[0], SImode, si_lo_off); - operands[3] = simplify_gen_subreg (HImode, operands[0], SImode, si_hi_off); +;; Overlapping non-HImode registers often (but not always) need a scratch. +;; The best we can do is use early clobber alternative "#&r" so that +;; completely non-overlapping operands dont get a scratch but # so register +;; allocation does not prefer non-overlapping. - operands[4] = simplify_gen_subreg (HImode, operands[1], SImode, si_lo_off); - operands[5] = simplify_gen_subreg (HImode, operands[1], SImode, si_hi_off); - if (REGNO (operands[0]) == REGNO(operands[1]) + 2) - { - emit_move_insn (operands[3], operands[4]); - DONE; - } - else if (REGNO (operands[0]) == REGNO(operands[1]) - 2) - { - emit_move_insn (operands[2], operands[5]); - DONE; - }" - [(set (attr "length") (if_then_else (eq_attr "mcu_have_movw" "yes") - (const_int 4) - (const_int 6))) - (set (attr "cc") (if_then_else (eq_attr "mcu_have_movw" "yes") - (const_string "clobber") - (const_string "none")))]) - -(define_insn_and_split "*rotlsi3_8" - [(set (match_operand:SI 0 "register_operand" "=r") - (rotate:SI (match_operand:SI 1 "register_operand" "r") - (const_int 8)))] - "" - "mov __tmp_reg__,%D1 - mov %D0,%C1 - mov %C0,%B1 - mov %B0,%A1 - mov %A0, __tmp_reg__" - "reload_completed - && REGNO (operands[0]) != REGNO (operands[1])" +; Split word aligned rotates using scratch that is mode dependent. +(define_insn_and_split "*rotw<mode>" + [(set (match_operand:HIDI 0 "register_operand" "=r,r,#&r") + (rotate:HIDI (match_operand:HIDI 1 "register_operand" "0,r,r") + (match_operand 2 "immediate_operand" "n,n,n"))) + (clobber (match_operand:<rotsmode> 3 "register_operand" "=<rotx>" ))] + "(CONST_INT_P (operands[2]) && + (0 == (INTVAL (operands[2]) % 16) && AVR_HAVE_MOVW))" + "#" + "&& (reload_completed || <MODE>mode == DImode)" [(const_int 0)] - "unsigned int si_lo_off = subreg_lowpart_offset (HImode, SImode); - unsigned int si_hi_off = subreg_highpart_offset (HImode, SImode); - unsigned int hi_lo_off = subreg_lowpart_offset (QImode, HImode); - unsigned int hi_hi_off = subreg_highpart_offset (QImode, HImode); - - operands[2] = simplify_gen_subreg (HImode, operands[0], SImode, si_lo_off); - operands[4] = simplify_gen_subreg (HImode, operands[0], SImode, si_hi_off); - operands[3] = simplify_gen_subreg (QImode, operands[2], HImode, hi_hi_off); - operands[2] = simplify_gen_subreg (QImode, operands[2], HImode, hi_lo_off); - operands[5] = simplify_gen_subreg (QImode, operands[4], HImode, hi_hi_off); - operands[4] = simplify_gen_subreg (QImode, operands[4], HImode, hi_lo_off); - - operands[6] = simplify_gen_subreg (HImode, operands[1], SImode, si_lo_off); - operands[8] = simplify_gen_subreg (HImode, operands[1], SImode, si_hi_off); - operands[7] = simplify_gen_subreg (QImode, operands[6], HImode, hi_hi_off); - operands[6] = simplify_gen_subreg (QImode, operands[6], HImode, hi_lo_off); - operands[9] = simplify_gen_subreg (QImode, operands[8], HImode, hi_hi_off); - operands[8] = simplify_gen_subreg (QImode, operands[8], HImode, hi_lo_off); - - if (REGNO (operands[0]) < REGNO(operands[1])) - { - emit_move_insn (operands[2], operands[9]); - emit_move_insn (operands[3], operands[6]); - emit_move_insn (operands[4], operands[7]); - emit_move_insn (operands[5], operands[8]); - } - else - { - emit_move_insn (operands[5], operands[8]); - emit_move_insn (operands[2], operands[9]); - emit_move_insn (operands[4], operands[7]); - emit_move_insn (operands[3], operands[6]); - } - DONE;" - [(set_attr "length" "5") - (set_attr "cc" "none")]) + "avr_rotate_bytes (operands); + DONE;" +) -(define_insn_and_split "*rotlsi3_24" - [(set (match_operand:SI 0 "register_operand" "=r") - (rotate:SI (match_operand:SI 1 "register_operand" "r") - (const_int 24)))] - "" - "mov __tmp_reg__,%A1 - mov %A0,%B1 - mov %B0,%C1 - mov %C0,%D1 - mov %D0, __tmp_reg__" - "reload_completed - && REGNO (operands[0]) != REGNO (operands[1])" + +; Split byte aligned rotates using scratch that is always QI mode. +(define_insn_and_split "*rotb<mode>" + [(set (match_operand:HIDI 0 "register_operand" "=r,r,#&r") + (rotate:HIDI (match_operand:HIDI 1 "register_operand" "0,r,r") + (match_operand 2 "immediate_operand" "n,n,n"))) + (clobber (match_operand:QI 3 "register_operand" "=<rotx>" ))] + "(CONST_INT_P (operands[2]) && + (8 == (INTVAL (operands[2]) % 16) + || (!AVR_HAVE_MOVW && 0 == (INTVAL (operands[2]) % 16))))" + "#" + "&& (reload_completed || <MODE>mode == DImode)" [(const_int 0)] - "unsigned int si_lo_off = subreg_lowpart_offset (HImode, SImode); - unsigned int si_hi_off = subreg_highpart_offset (HImode, SImode); - unsigned int hi_lo_off = subreg_lowpart_offset (QImode, HImode); - unsigned int hi_hi_off = subreg_highpart_offset (QImode, HImode); - - operands[2] = simplify_gen_subreg (HImode, operands[0], SImode, si_lo_off); - operands[4] = simplify_gen_subreg (HImode, operands[0], SImode, si_hi_off); - operands[3] = simplify_gen_subreg (QImode, operands[2], HImode, hi_hi_off); - operands[2] = simplify_gen_subreg (QImode, operands[2], HImode, hi_lo_off); - operands[5] = simplify_gen_subreg (QImode, operands[4], HImode, hi_hi_off); - operands[4] = simplify_gen_subreg (QImode, operands[4], HImode, hi_lo_off); - - operands[6] = simplify_gen_subreg (HImode, operands[1], SImode, si_lo_off); - operands[8] = simplify_gen_subreg (HImode, operands[1], SImode, si_hi_off); - operands[7] = simplify_gen_subreg (QImode, operands[6], HImode, hi_hi_off); - operands[6] = simplify_gen_subreg (QImode, operands[6], HImode, hi_lo_off); - operands[9] = simplify_gen_subreg (QImode, operands[8], HImode, hi_hi_off); - operands[8] = simplify_gen_subreg (QImode, operands[8], HImode, hi_lo_off); - - if (REGNO (operands[0]) < REGNO(operands[1])) - { - emit_move_insn (operands[2], operands[7]); - emit_move_insn (operands[5], operands[6]); - emit_move_insn (operands[3], operands[8]); - emit_move_insn (operands[4], operands[9]); - } - else - { - emit_move_insn (operands[5], operands[6]); - emit_move_insn (operands[4], operands[9]); - emit_move_insn (operands[3], operands[8]); - emit_move_insn (operands[2], operands[7]); - } - DONE;" - [(set_attr "length" "5") - (set_attr "cc" "none")]) + "avr_rotate_bytes (operands); + DONE;" +) + ;;<< << << << << << << << << << << << << << << << << << << << << << << << << << ;; arithmetic shift left |