From 590245153a345410c79f16df7c58660ff91df05e Mon Sep 17 00:00:00 2001 From: Georg-Johann Lay Date: Fri, 23 Sep 2011 17:02:10 +0000 Subject: re PR target/50447 ([avr] Better support of AND, OR, XOR and PLUS with constant integers for 16- and 32-bit values) PR target/50447 * config/avr/avr.md: (adjust_len): Add alternative "out_plus". (addsi3): Rewrite using QI scratch register. Adjust text peepholes using plus:SI. (*addsi3_zero_extend.hi): New insn. (*subsi3_zero_extend.hi): New insn. (*subhi3_zero_extend1): Set attribute "cc" to "set_czn". (*subsi3_zero_extend): Ditto. (subsi3): Change predicate #2 to register_operand. * config/avr/avr-protos.h (avr_out_plus): New prototype. (avr_out_plus_1): New static function. (avr_out_plus): New function. (adjust_insn_length): Handle ADJUST_LEN_OUT_PLUS. From-SVN: r179123 --- gcc/ChangeLog | 16 ++++ gcc/config/avr/avr-protos.h | 1 + gcc/config/avr/avr.c | 165 ++++++++++++++++++++++++++++++++++++++++ gcc/config/avr/avr.md | 178 ++++++++++++++++++++++++-------------------- 4 files changed, 280 insertions(+), 80 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index dec03cf..b631bf2 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,19 @@ +2011-09-23 Georg-Johann Lay + + PR target/50447 + * config/avr/avr.md: (adjust_len): Add alternative "out_plus". + (addsi3): Rewrite using QI scratch register. Adjust text + peepholes using plus:SI. + (*addsi3_zero_extend.hi): New insn. + (*subsi3_zero_extend.hi): New insn. + (*subhi3_zero_extend1): Set attribute "cc" to "set_czn". + (*subsi3_zero_extend): Ditto. + (subsi3): Change predicate #2 to register_operand. + * config/avr/avr-protos.h (avr_out_plus): New prototype. + (avr_out_plus_1): New static function. + (avr_out_plus): New function. + (adjust_insn_length): Handle ADJUST_LEN_OUT_PLUS. + 2011-09-23 Jakub Jelinek * config/i386/i386.c (ix86_prepare_sse_fp_compare_args): For diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h index 215fd83..c6673e4 100644 --- a/gcc/config/avr/avr-protos.h +++ b/gcc/config/avr/avr-protos.h @@ -82,6 +82,7 @@ extern void avr_output_bld (rtx operands[], int bit_nr); extern void avr_output_addr_vec_elt (FILE *stream, int value); extern const char *avr_out_sbxx_branch (rtx insn, rtx operands[]); extern const char* avr_out_bitop (rtx, rtx*, int*); +extern const char* avr_out_plus (rtx*, int*); extern bool avr_popcount_each_byte (rtx, int, int); extern int extra_constraint_Q (rtx x); diff --git a/gcc/config/avr/avr.c b/gcc/config/avr/avr.c index 9c8b43d..1682aa0 100644 --- a/gcc/config/avr/avr.c +++ b/gcc/config/avr/avr.c @@ -4503,6 +4503,167 @@ lshrsi3_out (rtx insn, rtx operands[], int *len) } +/* Output addition of register XOP[0] and compile time constant XOP[2]: + + XOP[0] = XOP[0] + XOP[2] + + and return "". If PLEN == NULL, print assembler instructions to perform the + addition; otherwise, set *PLEN to the length of the instruction sequence (in + words) printed with PLEN == NULL. XOP[3] is an 8-bit scratch register. + CODE == PLUS: perform addition by using ADD instructions. + CODE == MINUS: perform addition by using SUB instructions. */ + +static void +avr_out_plus_1 (rtx *xop, int *plen, enum rtx_code code) +{ + /* MODE of the operation. */ + enum machine_mode mode = GET_MODE (xop[0]); + + /* Number of bytes to operate on. */ + int i, n_bytes = GET_MODE_SIZE (mode); + + /* Value (0..0xff) held in clobber register op[3] or -1 if unknown. */ + int clobber_val = -1; + + /* op[0]: 8-bit destination register + op[1]: 8-bit const int + op[2]: 8-bit scratch register */ + rtx op[3]; + + /* Started the operation? Before starting the operation we may skip + adding 0. This is no more true after the operation started because + carry must be taken into account. */ + bool started = false; + + /* Value to add. There are two ways to add VAL: R += VAL and R -= -VAL. */ + rtx xval = xop[2]; + + if (MINUS == code) + xval = gen_int_mode (-UINTVAL (xval), mode); + + op[2] = xop[3]; + + if (plen) + *plen = 0; + + for (i = 0; i < n_bytes; i++) + { + /* We operate byte-wise on the destination. */ + rtx reg8 = simplify_gen_subreg (QImode, xop[0], mode, i); + rtx xval8 = simplify_gen_subreg (QImode, xval, mode, i); + + /* 8-bit value to operate with this byte. */ + unsigned int val8 = UINTVAL (xval8) & GET_MODE_MASK (QImode); + + /* Registers R16..R31 can operate with immediate. */ + bool ld_reg_p = test_hard_reg_class (LD_REGS, reg8); + + op[0] = reg8; + op[1] = GEN_INT (val8); + + if (!started && i % 2 == 0 + && test_hard_reg_class (ADDW_REGS, reg8)) + { + rtx xval16 = simplify_gen_subreg (HImode, xval, mode, i); + unsigned int val16 = UINTVAL (xval16) & GET_MODE_MASK (HImode); + + /* Registers R24, X, Y, Z can use ADIW/SBIW with constants < 64 + i.e. operate word-wise. */ + + if (val16 < 64) + { + if (val16 != 0) + { + started = true; + avr_asm_len (code == PLUS ? "adiw %0,%1" : "sbiw %0,%1", + op, plen, 1); + } + + i++; + continue; + } + } + + if (val8 == 0) + { + if (started) + avr_asm_len (code == PLUS + ? "adc %0,__zero_reg__" : "sbc %0,__zero_reg__", + op, plen, 1); + continue; + } + + switch (code) + { + case PLUS: + + gcc_assert (plen != NULL || REG_P (op[2])); + + if (clobber_val != (int) val8) + avr_asm_len ("ldi %2,%1", op, plen, 1); + clobber_val = (int) val8; + + avr_asm_len (started ? "adc %0,%2" : "add %0,%2", op, plen, 1); + + break; /* PLUS */ + + case MINUS: + + if (ld_reg_p) + avr_asm_len (started ? "sbci %0,%1" : "subi %0,%1", op, plen, 1); + else + { + gcc_assert (plen != NULL || REG_P (op[2])); + + if (clobber_val != (int) val8) + avr_asm_len ("ldi %2,%1", op, plen, 1); + clobber_val = (int) val8; + + avr_asm_len (started ? "sbc %0,%2" : "sub %0,%2", op, plen, 1); + } + + break; /* MINUS */ + + default: + /* Unknown code */ + gcc_unreachable(); + } + + started = true; + + } /* for all sub-bytes */ +} + + +/* Output addition of register XOP[0] and compile time constant XOP[2]: + + XOP[0] = XOP[0] + XOP[2] + + and return "". If PLEN == NULL, print assembler instructions to perform the + addition; otherwise, set *PLEN to the length of the instruction sequence (in + words) printed with PLEN == NULL. */ + +const char* +avr_out_plus (rtx *xop, int *plen) +{ + int len_plus, len_minus; + + /* Work out if XOP[0] += XOP[2] is better or XOP[0] -= -XOP[2]. */ + + avr_out_plus_1 (xop, &len_plus, PLUS); + avr_out_plus_1 (xop, &len_minus, MINUS); + + if (plen) + *plen = (len_minus <= len_plus) ? len_minus : len_plus; + else if (len_minus <= len_plus) + avr_out_plus_1 (xop, NULL, MINUS); + else + avr_out_plus_1 (xop, NULL, PLUS); + + return ""; +} + + /* Output bit operation (IOR, AND, XOR) with register XOP[0] and compile time constant XOP[2]: @@ -4851,6 +5012,10 @@ adjust_insn_length (rtx insn, int len) avr_out_bitop (insn, op, &len); break; + case ADJUST_LEN_OUT_PLUS: + avr_out_plus (op, &len); + break; + default: gcc_unreachable(); } diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md index c7a28c7..8e78ca6 100644 --- a/gcc/config/avr/avr.md +++ b/gcc/config/avr/avr.md @@ -136,7 +136,7 @@ ;; Otherwise do special processing depending on the attribute. (define_attr "adjust_len" - "yes,no,reload_in32,out_bitop" + "yes,no,reload_in32,out_bitop,out_plus" (const_string "yes")) ;; Define mode iterators @@ -909,31 +909,46 @@ (set_attr "cc" "set_n,set_czn,set_czn,set_czn,set_n,set_n")]) (define_insn "addsi3" - [(set (match_operand:SI 0 "register_operand" "=r,!w,!w,d,r,r") - (plus:SI - (match_operand:SI 1 "register_operand" "%0,0,0,0,0,0") - (match_operand:SI 2 "nonmemory_operand" "r,I,J,i,P,N")))] + [(set (match_operand:SI 0 "register_operand" "=r,!w,!w,d,l,l ,d,r") + (plus:SI (match_operand:SI 1 "register_operand" "%0,0 ,0 ,0,0,0 ,0,0") + (match_operand:SI 2 "nonmemory_operand" "r,I ,J ,s,P,N ,n,n"))) + (clobber (match_scratch:QI 3 "=X,X ,X ,X,X,X ,X,&d"))] "" - "@ - add %A0,%A2\;adc %B0,%B2\;adc %C0,%C2\;adc %D0,%D2 - adiw %0,%2\;adc %C0,__zero_reg__\;adc %D0,__zero_reg__ - sbiw %0,%n2\;sbc %C0,__zero_reg__\;sbc %D0,__zero_reg__ - subi %0,lo8(-(%2))\;sbci %B0,hi8(-(%2))\;sbci %C0,hlo8(-(%2))\;sbci %D0,hhi8(-(%2)) - sec\;adc %A0,__zero_reg__\;adc %B0,__zero_reg__\;adc %C0,__zero_reg__\;adc %D0,__zero_reg__ - sec\;sbc %A0,__zero_reg__\;sbc %B0,__zero_reg__\;sbc %C0,__zero_reg__\;sbc %D0,__zero_reg__" - [(set_attr "length" "4,3,3,4,5,5") - (set_attr "cc" "set_n,set_n,set_czn,set_czn,set_n,set_n")]) + { + static const char * const asm_code[] = + { + "add %A0,%A2\;adc %B0,%B2\;adc %C0,%C2\;adc %D0,%D2", + "adiw %0,%2\;adc %C0,__zero_reg__\;adc %D0,__zero_reg__", + "sbiw %0,%n2\;sbc %C0,__zero_reg__\;sbc %D0,__zero_reg__", + "subi %0,lo8(-(%2))\;sbci %B0,hi8(-(%2))\;sbci %C0,hlo8(-(%2))\;sbci %D0,hhi8(-(%2))", + "sec\;adc %A0,__zero_reg__\;adc %B0,__zero_reg__\;adc %C0,__zero_reg__\;adc %D0,__zero_reg__", + "sec\;sbc %A0,__zero_reg__\;sbc %B0,__zero_reg__\;sbc %C0,__zero_reg__\;sbc %D0,__zero_reg__" + }; + + if (which_alternative >= (signed) (sizeof (asm_code) / sizeof (*asm_code))) + return avr_out_plus (operands, NULL); + + return asm_code [which_alternative]; + } + [(set_attr "length" "4,3,3,4,5,5,8,8") + (set_attr "adjust_len" "no,no,no,no,no,no,out_plus,out_plus") + (set_attr "cc" "set_n,set_n,set_czn,set_czn,set_n,set_n,clobber,clobber")]) (define_insn "*addsi3_zero_extend" - [(set (match_operand:SI 0 "register_operand" "=r") - (plus:SI (zero_extend:SI - (match_operand:QI 1 "register_operand" "r")) - (match_operand:SI 2 "register_operand" "0")))] + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (zero_extend:SI (match_operand:QI 1 "register_operand" "r")) + (match_operand:SI 2 "register_operand" "0")))] "" - "add %A0,%1 - adc %B0,__zero_reg__ - adc %C0,__zero_reg__ - adc %D0,__zero_reg__" + "add %A0,%1\;adc %B0,__zero_reg__\;adc %C0,__zero_reg__\;adc %D0,__zero_reg__" + [(set_attr "length" "4") + (set_attr "cc" "set_n")]) + +(define_insn "*addsi3_zero_extend.hi" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (zero_extend:SI (match_operand:HI 1 "register_operand" "r")) + (match_operand:SI 2 "register_operand" "0")))] + "" + "add %A0,%1\;adc %B0,%B1\;adc %C0,__zero_reg__\;adc %D0,__zero_reg__" [(set_attr "length" "4") (set_attr "cc" "set_n")]) @@ -962,39 +977,40 @@ (set_attr "cc" "set_czn,set_czn")]) (define_insn "*subhi3_zero_extend1" - [(set (match_operand:HI 0 "register_operand" "=r") - (minus:HI (match_operand:HI 1 "register_operand" "0") - (zero_extend:HI - (match_operand:QI 2 "register_operand" "r"))))] + [(set (match_operand:HI 0 "register_operand" "=r") + (minus:HI (match_operand:HI 1 "register_operand" "0") + (zero_extend:HI (match_operand:QI 2 "register_operand" "r"))))] "" - "sub %A0,%2 - sbc %B0,__zero_reg__" + "sub %A0,%2\;sbc %B0,__zero_reg__" [(set_attr "length" "2") - (set_attr "cc" "set_n")]) + (set_attr "cc" "set_czn")]) (define_insn "subsi3" - [(set (match_operand:SI 0 "register_operand" "=r,d") - (minus:SI (match_operand:SI 1 "register_operand" "0,0") - (match_operand:SI 2 "nonmemory_operand" "r,i")))] + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "register_operand" "r")))] "" - "@ - sub %0,%2\;sbc %B0,%B2\;sbc %C0,%C2\;sbc %D0,%D2 - subi %A0,lo8(%2)\;sbci %B0,hi8(%2)\;sbci %C0,hlo8(%2)\;sbci %D0,hhi8(%2)" - [(set_attr "length" "4,4") - (set_attr "cc" "set_czn,set_czn")]) + "sub %0,%2\;sbc %B0,%B2\;sbc %C0,%C2\;sbc %D0,%D2" + [(set_attr "length" "4") + (set_attr "cc" "set_czn")]) (define_insn "*subsi3_zero_extend" - [(set (match_operand:SI 0 "register_operand" "=r") - (minus:SI (match_operand:SI 1 "register_operand" "0") - (zero_extend:SI - (match_operand:QI 2 "register_operand" "r"))))] - "" - "sub %A0,%2 - sbc %B0,__zero_reg__ - sbc %C0,__zero_reg__ - sbc %D0,__zero_reg__" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI (match_operand:SI 1 "register_operand" "0") + (zero_extend:SI (match_operand:QI 2 "register_operand" "r"))))] + "" + "sub %A0,%2\;sbc %B0,__zero_reg__\;sbc %C0,__zero_reg__\;sbc %D0,__zero_reg__" [(set_attr "length" "4") - (set_attr "cc" "set_n")]) + (set_attr "cc" "set_czn")]) + +(define_insn "*subsi3_zero_extend.hi" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI (match_operand:SI 1 "register_operand" "0") + (zero_extend:SI (match_operand:HI 2 "register_operand" "r"))))] + "" + "sub %A0,%2\;sbc %B0,%B2\;sbc %C0,__zero_reg__\;sbc %D0,__zero_reg__" + [(set_attr "length" "4") + (set_attr "cc" "set_czn")]) ;****************************************************************************** ; mul @@ -4093,42 +4109,44 @@ ;; ************************* Peepholes ******************************** (define_peephole - [(set (match_operand:SI 0 "d_register_operand" "") - (plus:SI (match_dup 0) - (const_int -1))) - (parallel - [(set (cc0) - (compare (match_dup 0) - (const_int -1))) - (clobber (match_operand:QI 1 "d_register_operand" ""))]) + [(parallel [(set (match_operand:SI 0 "d_register_operand" "") + (plus:SI (match_dup 0) + (const_int -1))) + (clobber (scratch:QI))]) + (parallel [(set (cc0) + (compare (match_dup 0) + (const_int -1))) + (clobber (match_operand:QI 1 "d_register_operand" ""))]) (set (pc) - (if_then_else (ne (cc0) (const_int 0)) - (label_ref (match_operand 2 "" "")) - (pc)))] + (if_then_else (ne (cc0) + (const_int 0)) + (label_ref (match_operand 2 "" "")) + (pc)))] "" - "* -{ - CC_STATUS_INIT; - if (test_hard_reg_class (ADDW_REGS, operands[0])) - output_asm_insn (AS2 (sbiw,%0,1) CR_TAB - AS2 (sbc,%C0,__zero_reg__) CR_TAB - AS2 (sbc,%D0,__zero_reg__) \"\\n\", operands); - else - output_asm_insn (AS2 (subi,%A0,1) CR_TAB - AS2 (sbc,%B0,__zero_reg__) CR_TAB - AS2 (sbc,%C0,__zero_reg__) CR_TAB - AS2 (sbc,%D0,__zero_reg__) \"\\n\", operands); - switch (avr_jump_mode (operands[2],insn)) { - case 1: - return AS1 (brcc,%2); - case 2: - return (AS1 (brcs,.+2) CR_TAB - AS1 (rjmp,%2)); - } - return (AS1 (brcs,.+4) CR_TAB - AS1 (jmp,%2)); -}") + CC_STATUS_INIT; + if (test_hard_reg_class (ADDW_REGS, operands[0])) + output_asm_insn (AS2 (sbiw,%0,1) CR_TAB + AS2 (sbc,%C0,__zero_reg__) CR_TAB + AS2 (sbc,%D0,__zero_reg__) "\n", operands); + else + output_asm_insn (AS2 (subi,%A0,1) CR_TAB + AS2 (sbc,%B0,__zero_reg__) CR_TAB + AS2 (sbc,%C0,__zero_reg__) CR_TAB + AS2 (sbc,%D0,__zero_reg__) "\n", operands); + + switch (avr_jump_mode (operands[2], insn)) + { + case 1: + return AS1 (brcc,%2); + case 2: + return (AS1 (brcs,.+2) CR_TAB + AS1 (rjmp,%2)); + } + + return (AS1 (brcs,.+4) CR_TAB + AS1 (jmp,%2)); + }) (define_peephole [(set (match_operand:HI 0 "d_register_operand" "") -- cgit v1.1