diff options
author | Georg-Johann Lay <avr@gjlay.de> | 2025-01-22 21:11:22 +0100 |
---|---|---|
committer | Georg-Johann Lay <avr@gjlay.de> | 2025-01-23 10:13:42 +0100 |
commit | f30edd17e62e9474f90785a5915959cd6d8c3f62 (patch) | |
tree | b9fe8cf33cbb603a2b19b7b7beca1702d204e187 /gcc/config | |
parent | b3f51ea894947e495baffc67407647a3b25acdd5 (diff) | |
download | gcc-f30edd17e62e9474f90785a5915959cd6d8c3f62.zip gcc-f30edd17e62e9474f90785a5915959cd6d8c3f62.tar.gz gcc-f30edd17e62e9474f90785a5915959cd6d8c3f62.tar.bz2 |
AVR: PR117726 - Tweak 32-bit logical shifts of 25...30 for -Oz.
As it turns out, logical 32-bit shifts with an offset of 25..30 can
be performed in 7 instructions or less. This beats the 7 instruc-
tions required for the default code of a shift loop.
Plus, with zero overhead, these cases can be 3-operand.
This is only relevant for -Oz because with -Os, 3op shifts are
split with -msplit-bit-shift (which is not performed with -Oz).
PR target/117726
gcc/
* config/avr/avr.cc (avr_ld_regno_p): New function.
(ashlsi3_out) [case 25,26,27,28,29,30]: Handle and tweak.
(lshrsi3_out): Same.
(avr_rtx_costs_1) [SImode, ASHIFT, LSHIFTRT]: Adjust costs.
* config/avr/avr.md (ashlsi3, *ashlsi3, *ashlsi3_const):
Add "r,r,C4L" alternative.
(lshrsi3, *lshrsi3, *lshrsi3_const): Add "r,r,C4R" alternative.
* config/avr/constraints.md (C4R, C4L): New,
gcc/testsuite/
* gcc.target/avr/torture/avr-torture.exp (AVR_TORTURE_OPTIONS):
Turn one option variant into -Oz.
Diffstat (limited to 'gcc/config')
-rw-r--r-- | gcc/config/avr/avr.cc | 163 | ||||
-rw-r--r-- | gcc/config/avr/avr.md | 40 | ||||
-rw-r--r-- | gcc/config/avr/constraints.md | 9 |
3 files changed, 175 insertions, 37 deletions
diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc index e5a5aa3..8628a43 100644 --- a/gcc/config/avr/avr.cc +++ b/gcc/config/avr/avr.cc @@ -418,6 +418,15 @@ avr_adiw_reg_p (rtx reg) } +/* Return true iff REGNO is in R16...R31. */ + +static bool +avr_ld_regno_p (int regno) +{ + return TEST_HARD_REG_CLASS (LD_REGS, regno); +} + + static bool ra_in_progress () { @@ -7397,17 +7406,20 @@ ashlsi3_out (rtx_insn *insn, rtx operands[], int *plen) { if (CONST_INT_P (operands[2])) { + int off = INTVAL (operands[2]); int reg0 = true_regnum (operands[0]); int reg1 = true_regnum (operands[1]); bool reg1_unused_after = reg_unused_after (insn, operands[1]); - + bool scratch_p = (GET_CODE (PATTERN (insn)) == PARALLEL + && XVECLEN (PATTERN (insn), 0) == 3 + && REG_P (operands[3])); if (plen) *plen = 0; - switch (INTVAL (operands[2])) + switch (off) { default: - if (INTVAL (operands[2]) < 32) + if (off < 32) break; return AVR_HAVE_MOVW @@ -7461,11 +7473,58 @@ ashlsi3_out (rtx_insn *insn, rtx operands[], int *plen) "mov %D0,%B1" CR_TAB "clr %B0" CR_TAB "clr %A0", operands, plen, 4); + case 30: + if (AVR_HAVE_MUL && scratch_p) + return avr_asm_len ("ldi %3,1<<6" CR_TAB + "mul %3,%A1" CR_TAB + "mov %D0,r0" CR_TAB + "clr __zero_reg__" CR_TAB + "clr %C0" CR_TAB + "clr %B0" CR_TAB + "clr %A0", operands, plen, 7); + // Fallthrough + + case 28: + case 29: + { + const bool ld_reg0_p = avr_ld_regno_p (reg0 + 3); // %D0 + const bool ld_reg1_p = avr_ld_regno_p (reg1 + 0); // %A1 + if (ld_reg0_p + || (ld_reg1_p && reg1_unused_after) + || scratch_p) + { + if (ld_reg0_p) + avr_asm_len ("mov %D0,%A1" CR_TAB + "swap %D0" CR_TAB + "andi %D0,0xf0", operands, plen, 3); + else if (ld_reg1_p && reg1_unused_after) + avr_asm_len ("swap %A1" CR_TAB + "andi %A1,0xf0" CR_TAB + "mov %D0,%A1", operands, plen, 3); + else + avr_asm_len ("mov %D0,%A1" CR_TAB + "swap %D0" CR_TAB + "ldi %3,0xf0" CR_TAB + "and %D0,%3", operands, plen, 4); + for (int i = 28; i < off; ++i) + avr_asm_len ("lsl %D0", operands, plen, 1); + return avr_asm_len ("clr %C0" CR_TAB + "clr %B0" CR_TAB + "clr %A0", operands, plen, 3); + } + } + // Fallthrough + case 24: - return avr_asm_len ("mov %D0,%A1" CR_TAB - "clr %C0" CR_TAB + case 25: + case 26: + case 27: + avr_asm_len ("mov %D0,%A1", operands, plen, 1); + for (int i = 24; i < off; ++i) + avr_asm_len ("lsl %D0", operands, plen, 1); + return avr_asm_len ("clr %C0" CR_TAB "clr %B0" CR_TAB - "clr %A0", operands, plen, 4); + "clr %A0", operands, plen, 3); case 31: return AVR_HAVE_MOVW ? avr_asm_len ("bst %A1,0" CR_TAB @@ -8298,17 +8357,20 @@ lshrsi3_out (rtx_insn *insn, rtx operands[], int *plen) { if (CONST_INT_P (operands[2])) { + int off = INTVAL (operands[2]); int reg0 = true_regnum (operands[0]); int reg1 = true_regnum (operands[1]); bool reg1_unused_after = reg_unused_after (insn, operands[1]); - + bool scratch_p = (GET_CODE (PATTERN (insn)) == PARALLEL + && XVECLEN (PATTERN (insn), 0) == 3 + && REG_P (operands[3])); if (plen) *plen = 0; - switch (INTVAL (operands[2])) + switch (off) { default: - if (INTVAL (operands[2]) < 32) + if (off < 32) break; return AVR_HAVE_MOVW @@ -8362,11 +8424,58 @@ lshrsi3_out (rtx_insn *insn, rtx operands[], int *plen) "mov %A0,%C1" CR_TAB "clr %C0" CR_TAB "clr %D0", operands, plen, 4); + case 30: + if (AVR_HAVE_MUL && scratch_p) + return avr_asm_len ("ldi %3,1<<2" CR_TAB + "mul %3,%D1" CR_TAB + "mov %A0,r1" CR_TAB + "clr __zero_reg__" CR_TAB + "clr %B0" CR_TAB + "clr %C0" CR_TAB + "clr %D0", operands, plen, 7); + // Fallthrough + + case 29: + case 28: + { + const bool ld_reg0_p = avr_ld_regno_p (reg0 + 0); // %A0 + const bool ld_reg1_p = avr_ld_regno_p (reg1 + 3); // %D1 + if (ld_reg0_p + || (ld_reg1_p && reg1_unused_after) + || scratch_p) + { + if (ld_reg0_p) + avr_asm_len ("mov %A0,%D1" CR_TAB + "swap %A0" CR_TAB + "andi %A0,0x0f", operands, plen, 3); + else if (ld_reg1_p && reg1_unused_after) + avr_asm_len ("swap %D1" CR_TAB + "andi %D1,0x0f" CR_TAB + "mov %A0,%D1", operands, plen, 3); + else + avr_asm_len ("mov %A0,%D1" CR_TAB + "swap %A0" CR_TAB + "ldi %3,0x0f" CR_TAB + "and %A0,%3", operands, plen, 4); + for (int i = 28; i < off; ++i) + avr_asm_len ("lsr %A0", operands, plen, 1); + return avr_asm_len ("clr %B0" CR_TAB + "clr %C0" CR_TAB + "clr %D0", operands, plen, 3); + } + } + // Fallthrough + + case 27: + case 26: + case 25: case 24: - return avr_asm_len ("mov %A0,%D1" CR_TAB - "clr %B0" CR_TAB + avr_asm_len ("mov %A0,%D1", operands, plen, 1); + for (int i = 24; i < off; ++i) + avr_asm_len ("lsr %A0", operands, plen, 1); + return avr_asm_len ("clr %B0" CR_TAB "clr %C0" CR_TAB - "clr %D0", operands, plen, 4); + "clr %D0", operands, plen, 3); case 31: return AVR_HAVE_MOVW ? avr_asm_len ("bst %D1,7" CR_TAB @@ -13037,9 +13146,6 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code, case 0: *total = 0; break; - case 24: - *total = COSTS_N_INSNS (3); - break; case 1: case 8: *total = COSTS_N_INSNS (4); @@ -13050,6 +13156,19 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code, case 16: *total = COSTS_N_INSNS (4 - AVR_HAVE_MOVW); break; + case 24: + case 25: + case 26: + case 27: + *total = COSTS_N_INSNS (4 + val1 - 24); + break; + case 28: + case 29: + *total = COSTS_N_INSNS (6 + val1 - 28); + break; + case 30: + *total = COSTS_N_INSNS (!speed && AVR_HAVE_MUL ? 7 : 8); + break; case 31: *total = COSTS_N_INSNS (6); break; @@ -13346,6 +13465,7 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code, *total = 0; break; case 1: + case 8: *total = COSTS_N_INSNS (4); break; case 2: @@ -13357,9 +13477,18 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code, case 16: *total = COSTS_N_INSNS (4 - AVR_HAVE_MOVW); break; - case 8: case 24: - *total = COSTS_N_INSNS (4); + case 25: + case 26: + case 27: + *total = COSTS_N_INSNS (4 + val1 - 24); + break; + case 28: + case 29: + *total = COSTS_N_INSNS (6 + val1 - 28); + break; + case 30: + *total = COSTS_N_INSNS (!speed && AVR_HAVE_MUL ? 7 : 8); break; case 31: *total = COSTS_N_INSNS (6); diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md index 594940c..6550fad 100644 --- a/gcc/config/avr/avr.md +++ b/gcc/config/avr/avr.md @@ -5363,9 +5363,9 @@ ;; "ashlsq3" "ashlusq3" ;; "ashlsa3" "ashlusa3" (define_insn_and_split "ashl<mode>3" - [(set (match_operand:ALL4 0 "register_operand" "=r,r ,r ,r ,r,r") - (ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0,0 ,r ,r ,0,0") - (match_operand:QI 2 "nop_general_operand" "r,LPK,O C15 C31,C4l,n,Qm")))] + [(set (match_operand:ALL4 0 "register_operand" "=r,r ,r ,r ,r,r") + (ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0,0 ,r ,r ,0,0") + (match_operand:QI 2 "nop_general_operand" "r,LPK,O C4L,C4l,n,Qm")))] "" "#" "&& reload_completed" @@ -5377,9 +5377,9 @@ [(set_attr "isa" "*,*,*,3op,*,*")]) (define_insn "*ashl<mode>3" - [(set (match_operand:ALL4 0 "register_operand" "=r,r ,r ,r ,r,r") - (ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0,0 ,r ,r ,0,0") - (match_operand:QI 2 "nop_general_operand" "r,LPK,O C15 C31,C4l,n,Qm"))) + [(set (match_operand:ALL4 0 "register_operand" "=r,r ,r ,r ,r,r") + (ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0,0 ,r ,r ,0,0") + (match_operand:QI 2 "nop_general_operand" "r,LPK,O C4L,C4l,n,Qm"))) (clobber (reg:CC REG_CC))] "reload_completed" { @@ -5564,10 +5564,10 @@ ;; "*ashlsq3_const" "*ashlusq3_const" ;; "*ashlsa3_const" "*ashlusa3_const" (define_insn "*ashl<mode>3_const" - [(set (match_operand:ALL4 0 "register_operand" "=r ,r ,r ,r") - (ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0 ,r ,r ,0") - (match_operand:QI 2 "const_int_operand" "LP,O C15 C31,C4l,n"))) - (clobber (match_operand:QI 3 "scratch_or_dreg_operand" "=X ,X ,&d ,&d")) + [(set (match_operand:ALL4 0 "register_operand" "=r ,r ,r ,r") + (ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0 ,r ,r ,0") + (match_operand:QI 2 "const_int_operand" "LP,O C4L,C4l,n"))) + (clobber (match_operand:QI 3 "scratch_or_dreg_operand" "=X ,X ,&d ,&d")) (clobber (reg:CC REG_CC))] "reload_completed" { @@ -5955,9 +5955,9 @@ ;; "lshrsq3" "lshrusq3" ;; "lshrsa3" "lshrusa3" (define_insn_and_split "lshr<mode>3" - [(set (match_operand:ALL4 0 "register_operand" "=r,r ,r ,r ,r,r") - (lshiftrt:ALL4 (match_operand:ALL4 1 "register_operand" "0,0 ,r ,r ,0,0") - (match_operand:QI 2 "nop_general_operand" "r,LPK,O C15 C31,C4r,n,Qm")))] + [(set (match_operand:ALL4 0 "register_operand" "=r,r ,r ,r ,r,r") + (lshiftrt:ALL4 (match_operand:ALL4 1 "register_operand" "0,0 ,r ,r ,0,0") + (match_operand:QI 2 "nop_general_operand" "r,LPK,O C4R,C4r,n,Qm")))] "" "#" "&& reload_completed" @@ -5969,9 +5969,9 @@ [(set_attr "isa" "*,*,*,3op,*,*")]) (define_insn "*lshr<mode>3" - [(set (match_operand:ALL4 0 "register_operand" "=r,r ,r ,r ,r,r") - (lshiftrt:ALL4 (match_operand:ALL4 1 "register_operand" "0,0 ,r ,r ,0,0") - (match_operand:QI 2 "nop_general_operand" "r,LPK,O C15 C31,C4r,n,Qm"))) + [(set (match_operand:ALL4 0 "register_operand" "=r,r ,r ,r ,r,r") + (lshiftrt:ALL4 (match_operand:ALL4 1 "register_operand" "0,0 ,r ,r ,0,0") + (match_operand:QI 2 "nop_general_operand" "r,LPK,O C4R,C4r,n,Qm"))) (clobber (reg:CC REG_CC))] "reload_completed" { @@ -6059,10 +6059,10 @@ ;; "*lshrsq3_const" "*lshrusq3_const" ;; "*lshrsa3_const" "*lshrusa3_const" (define_insn "*lshr<mode>3_const" - [(set (match_operand:ALL4 0 "register_operand" "=r ,r ,r ,r") - (lshiftrt:ALL4 (match_operand:ALL4 1 "register_operand" "0 ,r ,r ,0") - (match_operand:QI 2 "const_int_operand" "LP,O C15 C31,C4r,n"))) - (clobber (match_operand:QI 3 "scratch_or_dreg_operand" "=X ,X ,&d ,&d")) + [(set (match_operand:ALL4 0 "register_operand" "=r ,r ,r ,r") + (lshiftrt:ALL4 (match_operand:ALL4 1 "register_operand" "0 ,r ,r ,0") + (match_operand:QI 2 "const_int_operand" "LP,O C4R,C4r,n"))) + (clobber (match_operand:QI 3 "scratch_or_dreg_operand" "=X ,X ,&d ,&d")) (clobber (reg:CC REG_CC))] "reload_completed" { diff --git a/gcc/config/avr/constraints.md b/gcc/config/avr/constraints.md index fc8d4d5..2ca9cc3 100644 --- a/gcc/config/avr/constraints.md +++ b/gcc/config/avr/constraints.md @@ -328,6 +328,15 @@ (and (match_code "const_int") (match_test "avr_split_shift_p (4, ival, ASHIFT)"))) +(define_constraint "C4R" + "A constant integer shift offset for a 4-byte LSHIFTRT that's a 3-operand insn independent of options." + (and (match_code "const_int") + (match_test "ival == 15 || IN_RANGE (ival, 25, 31)"))) + +(define_constraint "C4L" + "A constant integer shift offset for a 4-byte ASHIFT that's a 3-operand insn independent of options." + (and (match_code "const_int") + (match_test "ival == 15 || IN_RANGE (ival, 25, 31)"))) ;; CONST_FIXED is no element of 'n' so cook our own. ;; "i" or "s" would match but because the insn uses iterators that cover |