aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorGeorg-Johann Lay <avr@gjlay.de>2025-01-22 21:11:22 +0100
committerGeorg-Johann Lay <avr@gjlay.de>2025-01-23 10:13:42 +0100
commitf30edd17e62e9474f90785a5915959cd6d8c3f62 (patch)
treeb9fe8cf33cbb603a2b19b7b7beca1702d204e187 /gcc/config
parentb3f51ea894947e495baffc67407647a3b25acdd5 (diff)
downloadgcc-f30edd17e62e9474f90785a5915959cd6d8c3f62.zip
gcc-f30edd17e62e9474f90785a5915959cd6d8c3f62.tar.gz
gcc-f30edd17e62e9474f90785a5915959cd6d8c3f62.tar.bz2
AVR: PR117726 - Tweak 32-bit logical shifts of 25...30 for -Oz.
As it turns out, logical 32-bit shifts with an offset of 25..30 can be performed in 7 instructions or less. This beats the 7 instruc- tions required for the default code of a shift loop. Plus, with zero overhead, these cases can be 3-operand. This is only relevant for -Oz because with -Os, 3op shifts are split with -msplit-bit-shift (which is not performed with -Oz). PR target/117726 gcc/ * config/avr/avr.cc (avr_ld_regno_p): New function. (ashlsi3_out) [case 25,26,27,28,29,30]: Handle and tweak. (lshrsi3_out): Same. (avr_rtx_costs_1) [SImode, ASHIFT, LSHIFTRT]: Adjust costs. * config/avr/avr.md (ashlsi3, *ashlsi3, *ashlsi3_const): Add "r,r,C4L" alternative. (lshrsi3, *lshrsi3, *lshrsi3_const): Add "r,r,C4R" alternative. * config/avr/constraints.md (C4R, C4L): New, gcc/testsuite/ * gcc.target/avr/torture/avr-torture.exp (AVR_TORTURE_OPTIONS): Turn one option variant into -Oz.
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/avr/avr.cc163
-rw-r--r--gcc/config/avr/avr.md40
-rw-r--r--gcc/config/avr/constraints.md9
3 files changed, 175 insertions, 37 deletions
diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index e5a5aa3..8628a43 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -418,6 +418,15 @@ avr_adiw_reg_p (rtx reg)
}
+/* Return true iff REGNO is in R16...R31. */
+
+static bool
+avr_ld_regno_p (int regno)
+{
+ return TEST_HARD_REG_CLASS (LD_REGS, regno);
+}
+
+
static bool
ra_in_progress ()
{
@@ -7397,17 +7406,20 @@ ashlsi3_out (rtx_insn *insn, rtx operands[], int *plen)
{
if (CONST_INT_P (operands[2]))
{
+ int off = INTVAL (operands[2]);
int reg0 = true_regnum (operands[0]);
int reg1 = true_regnum (operands[1]);
bool reg1_unused_after = reg_unused_after (insn, operands[1]);
-
+ bool scratch_p = (GET_CODE (PATTERN (insn)) == PARALLEL
+ && XVECLEN (PATTERN (insn), 0) == 3
+ && REG_P (operands[3]));
if (plen)
*plen = 0;
- switch (INTVAL (operands[2]))
+ switch (off)
{
default:
- if (INTVAL (operands[2]) < 32)
+ if (off < 32)
break;
return AVR_HAVE_MOVW
@@ -7461,11 +7473,58 @@ ashlsi3_out (rtx_insn *insn, rtx operands[], int *plen)
"mov %D0,%B1" CR_TAB
"clr %B0" CR_TAB
"clr %A0", operands, plen, 4);
+ case 30:
+ if (AVR_HAVE_MUL && scratch_p)
+ return avr_asm_len ("ldi %3,1<<6" CR_TAB
+ "mul %3,%A1" CR_TAB
+ "mov %D0,r0" CR_TAB
+ "clr __zero_reg__" CR_TAB
+ "clr %C0" CR_TAB
+ "clr %B0" CR_TAB
+ "clr %A0", operands, plen, 7);
+ // Fallthrough
+
+ case 28:
+ case 29:
+ {
+ const bool ld_reg0_p = avr_ld_regno_p (reg0 + 3); // %D0
+ const bool ld_reg1_p = avr_ld_regno_p (reg1 + 0); // %A1
+ if (ld_reg0_p
+ || (ld_reg1_p && reg1_unused_after)
+ || scratch_p)
+ {
+ if (ld_reg0_p)
+ avr_asm_len ("mov %D0,%A1" CR_TAB
+ "swap %D0" CR_TAB
+ "andi %D0,0xf0", operands, plen, 3);
+ else if (ld_reg1_p && reg1_unused_after)
+ avr_asm_len ("swap %A1" CR_TAB
+ "andi %A1,0xf0" CR_TAB
+ "mov %D0,%A1", operands, plen, 3);
+ else
+ avr_asm_len ("mov %D0,%A1" CR_TAB
+ "swap %D0" CR_TAB
+ "ldi %3,0xf0" CR_TAB
+ "and %D0,%3", operands, plen, 4);
+ for (int i = 28; i < off; ++i)
+ avr_asm_len ("lsl %D0", operands, plen, 1);
+ return avr_asm_len ("clr %C0" CR_TAB
+ "clr %B0" CR_TAB
+ "clr %A0", operands, plen, 3);
+ }
+ }
+ // Fallthrough
+
case 24:
- return avr_asm_len ("mov %D0,%A1" CR_TAB
- "clr %C0" CR_TAB
+ case 25:
+ case 26:
+ case 27:
+ avr_asm_len ("mov %D0,%A1", operands, plen, 1);
+ for (int i = 24; i < off; ++i)
+ avr_asm_len ("lsl %D0", operands, plen, 1);
+ return avr_asm_len ("clr %C0" CR_TAB
"clr %B0" CR_TAB
- "clr %A0", operands, plen, 4);
+ "clr %A0", operands, plen, 3);
case 31:
return AVR_HAVE_MOVW
? avr_asm_len ("bst %A1,0" CR_TAB
@@ -8298,17 +8357,20 @@ lshrsi3_out (rtx_insn *insn, rtx operands[], int *plen)
{
if (CONST_INT_P (operands[2]))
{
+ int off = INTVAL (operands[2]);
int reg0 = true_regnum (operands[0]);
int reg1 = true_regnum (operands[1]);
bool reg1_unused_after = reg_unused_after (insn, operands[1]);
-
+ bool scratch_p = (GET_CODE (PATTERN (insn)) == PARALLEL
+ && XVECLEN (PATTERN (insn), 0) == 3
+ && REG_P (operands[3]));
if (plen)
*plen = 0;
- switch (INTVAL (operands[2]))
+ switch (off)
{
default:
- if (INTVAL (operands[2]) < 32)
+ if (off < 32)
break;
return AVR_HAVE_MOVW
@@ -8362,11 +8424,58 @@ lshrsi3_out (rtx_insn *insn, rtx operands[], int *plen)
"mov %A0,%C1" CR_TAB
"clr %C0" CR_TAB
"clr %D0", operands, plen, 4);
+ case 30:
+ if (AVR_HAVE_MUL && scratch_p)
+ return avr_asm_len ("ldi %3,1<<2" CR_TAB
+ "mul %3,%D1" CR_TAB
+ "mov %A0,r1" CR_TAB
+ "clr __zero_reg__" CR_TAB
+ "clr %B0" CR_TAB
+ "clr %C0" CR_TAB
+ "clr %D0", operands, plen, 7);
+ // Fallthrough
+
+ case 29:
+ case 28:
+ {
+ const bool ld_reg0_p = avr_ld_regno_p (reg0 + 0); // %A0
+ const bool ld_reg1_p = avr_ld_regno_p (reg1 + 3); // %D1
+ if (ld_reg0_p
+ || (ld_reg1_p && reg1_unused_after)
+ || scratch_p)
+ {
+ if (ld_reg0_p)
+ avr_asm_len ("mov %A0,%D1" CR_TAB
+ "swap %A0" CR_TAB
+ "andi %A0,0x0f", operands, plen, 3);
+ else if (ld_reg1_p && reg1_unused_after)
+ avr_asm_len ("swap %D1" CR_TAB
+ "andi %D1,0x0f" CR_TAB
+ "mov %A0,%D1", operands, plen, 3);
+ else
+ avr_asm_len ("mov %A0,%D1" CR_TAB
+ "swap %A0" CR_TAB
+ "ldi %3,0x0f" CR_TAB
+ "and %A0,%3", operands, plen, 4);
+ for (int i = 28; i < off; ++i)
+ avr_asm_len ("lsr %A0", operands, plen, 1);
+ return avr_asm_len ("clr %B0" CR_TAB
+ "clr %C0" CR_TAB
+ "clr %D0", operands, plen, 3);
+ }
+ }
+ // Fallthrough
+
+ case 27:
+ case 26:
+ case 25:
case 24:
- return avr_asm_len ("mov %A0,%D1" CR_TAB
- "clr %B0" CR_TAB
+ avr_asm_len ("mov %A0,%D1", operands, plen, 1);
+ for (int i = 24; i < off; ++i)
+ avr_asm_len ("lsr %A0", operands, plen, 1);
+ return avr_asm_len ("clr %B0" CR_TAB
"clr %C0" CR_TAB
- "clr %D0", operands, plen, 4);
+ "clr %D0", operands, plen, 3);
case 31:
return AVR_HAVE_MOVW
? avr_asm_len ("bst %D1,7" CR_TAB
@@ -13037,9 +13146,6 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
case 0:
*total = 0;
break;
- case 24:
- *total = COSTS_N_INSNS (3);
- break;
case 1:
case 8:
*total = COSTS_N_INSNS (4);
@@ -13050,6 +13156,19 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
case 16:
*total = COSTS_N_INSNS (4 - AVR_HAVE_MOVW);
break;
+ case 24:
+ case 25:
+ case 26:
+ case 27:
+ *total = COSTS_N_INSNS (4 + val1 - 24);
+ break;
+ case 28:
+ case 29:
+ *total = COSTS_N_INSNS (6 + val1 - 28);
+ break;
+ case 30:
+ *total = COSTS_N_INSNS (!speed && AVR_HAVE_MUL ? 7 : 8);
+ break;
case 31:
*total = COSTS_N_INSNS (6);
break;
@@ -13346,6 +13465,7 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
*total = 0;
break;
case 1:
+ case 8:
*total = COSTS_N_INSNS (4);
break;
case 2:
@@ -13357,9 +13477,18 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
case 16:
*total = COSTS_N_INSNS (4 - AVR_HAVE_MOVW);
break;
- case 8:
case 24:
- *total = COSTS_N_INSNS (4);
+ case 25:
+ case 26:
+ case 27:
+ *total = COSTS_N_INSNS (4 + val1 - 24);
+ break;
+ case 28:
+ case 29:
+ *total = COSTS_N_INSNS (6 + val1 - 28);
+ break;
+ case 30:
+ *total = COSTS_N_INSNS (!speed && AVR_HAVE_MUL ? 7 : 8);
break;
case 31:
*total = COSTS_N_INSNS (6);
diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md
index 594940c..6550fad 100644
--- a/gcc/config/avr/avr.md
+++ b/gcc/config/avr/avr.md
@@ -5363,9 +5363,9 @@
;; "ashlsq3" "ashlusq3"
;; "ashlsa3" "ashlusa3"
(define_insn_and_split "ashl<mode>3"
- [(set (match_operand:ALL4 0 "register_operand" "=r,r ,r ,r ,r,r")
- (ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0,0 ,r ,r ,0,0")
- (match_operand:QI 2 "nop_general_operand" "r,LPK,O C15 C31,C4l,n,Qm")))]
+ [(set (match_operand:ALL4 0 "register_operand" "=r,r ,r ,r ,r,r")
+ (ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0,0 ,r ,r ,0,0")
+ (match_operand:QI 2 "nop_general_operand" "r,LPK,O C4L,C4l,n,Qm")))]
""
"#"
"&& reload_completed"
@@ -5377,9 +5377,9 @@
[(set_attr "isa" "*,*,*,3op,*,*")])
(define_insn "*ashl<mode>3"
- [(set (match_operand:ALL4 0 "register_operand" "=r,r ,r ,r ,r,r")
- (ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0,0 ,r ,r ,0,0")
- (match_operand:QI 2 "nop_general_operand" "r,LPK,O C15 C31,C4l,n,Qm")))
+ [(set (match_operand:ALL4 0 "register_operand" "=r,r ,r ,r ,r,r")
+ (ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0,0 ,r ,r ,0,0")
+ (match_operand:QI 2 "nop_general_operand" "r,LPK,O C4L,C4l,n,Qm")))
(clobber (reg:CC REG_CC))]
"reload_completed"
{
@@ -5564,10 +5564,10 @@
;; "*ashlsq3_const" "*ashlusq3_const"
;; "*ashlsa3_const" "*ashlusa3_const"
(define_insn "*ashl<mode>3_const"
- [(set (match_operand:ALL4 0 "register_operand" "=r ,r ,r ,r")
- (ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0 ,r ,r ,0")
- (match_operand:QI 2 "const_int_operand" "LP,O C15 C31,C4l,n")))
- (clobber (match_operand:QI 3 "scratch_or_dreg_operand" "=X ,X ,&d ,&d"))
+ [(set (match_operand:ALL4 0 "register_operand" "=r ,r ,r ,r")
+ (ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0 ,r ,r ,0")
+ (match_operand:QI 2 "const_int_operand" "LP,O C4L,C4l,n")))
+ (clobber (match_operand:QI 3 "scratch_or_dreg_operand" "=X ,X ,&d ,&d"))
(clobber (reg:CC REG_CC))]
"reload_completed"
{
@@ -5955,9 +5955,9 @@
;; "lshrsq3" "lshrusq3"
;; "lshrsa3" "lshrusa3"
(define_insn_and_split "lshr<mode>3"
- [(set (match_operand:ALL4 0 "register_operand" "=r,r ,r ,r ,r,r")
- (lshiftrt:ALL4 (match_operand:ALL4 1 "register_operand" "0,0 ,r ,r ,0,0")
- (match_operand:QI 2 "nop_general_operand" "r,LPK,O C15 C31,C4r,n,Qm")))]
+ [(set (match_operand:ALL4 0 "register_operand" "=r,r ,r ,r ,r,r")
+ (lshiftrt:ALL4 (match_operand:ALL4 1 "register_operand" "0,0 ,r ,r ,0,0")
+ (match_operand:QI 2 "nop_general_operand" "r,LPK,O C4R,C4r,n,Qm")))]
""
"#"
"&& reload_completed"
@@ -5969,9 +5969,9 @@
[(set_attr "isa" "*,*,*,3op,*,*")])
(define_insn "*lshr<mode>3"
- [(set (match_operand:ALL4 0 "register_operand" "=r,r ,r ,r ,r,r")
- (lshiftrt:ALL4 (match_operand:ALL4 1 "register_operand" "0,0 ,r ,r ,0,0")
- (match_operand:QI 2 "nop_general_operand" "r,LPK,O C15 C31,C4r,n,Qm")))
+ [(set (match_operand:ALL4 0 "register_operand" "=r,r ,r ,r ,r,r")
+ (lshiftrt:ALL4 (match_operand:ALL4 1 "register_operand" "0,0 ,r ,r ,0,0")
+ (match_operand:QI 2 "nop_general_operand" "r,LPK,O C4R,C4r,n,Qm")))
(clobber (reg:CC REG_CC))]
"reload_completed"
{
@@ -6059,10 +6059,10 @@
;; "*lshrsq3_const" "*lshrusq3_const"
;; "*lshrsa3_const" "*lshrusa3_const"
(define_insn "*lshr<mode>3_const"
- [(set (match_operand:ALL4 0 "register_operand" "=r ,r ,r ,r")
- (lshiftrt:ALL4 (match_operand:ALL4 1 "register_operand" "0 ,r ,r ,0")
- (match_operand:QI 2 "const_int_operand" "LP,O C15 C31,C4r,n")))
- (clobber (match_operand:QI 3 "scratch_or_dreg_operand" "=X ,X ,&d ,&d"))
+ [(set (match_operand:ALL4 0 "register_operand" "=r ,r ,r ,r")
+ (lshiftrt:ALL4 (match_operand:ALL4 1 "register_operand" "0 ,r ,r ,0")
+ (match_operand:QI 2 "const_int_operand" "LP,O C4R,C4r,n")))
+ (clobber (match_operand:QI 3 "scratch_or_dreg_operand" "=X ,X ,&d ,&d"))
(clobber (reg:CC REG_CC))]
"reload_completed"
{
diff --git a/gcc/config/avr/constraints.md b/gcc/config/avr/constraints.md
index fc8d4d5..2ca9cc3 100644
--- a/gcc/config/avr/constraints.md
+++ b/gcc/config/avr/constraints.md
@@ -328,6 +328,15 @@
(and (match_code "const_int")
(match_test "avr_split_shift_p (4, ival, ASHIFT)")))
+(define_constraint "C4R"
+ "A constant integer shift offset for a 4-byte LSHIFTRT that's a 3-operand insn independent of options."
+ (and (match_code "const_int")
+ (match_test "ival == 15 || IN_RANGE (ival, 25, 31)")))
+
+(define_constraint "C4L"
+ "A constant integer shift offset for a 4-byte ASHIFT that's a 3-operand insn independent of options."
+ (and (match_code "const_int")
+ (match_test "ival == 15 || IN_RANGE (ival, 25, 31)")))
;; CONST_FIXED is no element of 'n' so cook our own.
;; "i" or "s" would match but because the insn uses iterators that cover