diff options
author | Georg-Johann Lay <avr@gjlay.de> | 2025-01-11 14:10:29 +0100 |
---|---|---|
committer | Georg-Johann Lay <avr@gjlay.de> | 2025-01-23 17:38:05 +0100 |
commit | 0bb3223097e5ced4f9a13d18c6c65f2a9496437e (patch) | |
tree | f290432aa22214479aecbdb3c6989f734ffc1bdc /gcc/config | |
parent | 2d55c0161562f96d2230cd132b494a5d06352a23 (diff) | |
download | gcc-0bb3223097e5ced4f9a13d18c6c65f2a9496437e.zip gcc-0bb3223097e5ced4f9a13d18c6c65f2a9496437e.tar.gz gcc-0bb3223097e5ced4f9a13d18c6c65f2a9496437e.tar.bz2 |
AVR: PR118012 - Try to work around sick code from match.pd.
This patch tries to work around PR118012 which may use a
full fledged multiplication instead of a simple bit test.
This is because match.pd's
/* (zero_one == 0) ? y : z <op> y -> ((typeof(y))zero_one * z) <op> y */
/* (zero_one != 0) ? z <op> y : y -> ((typeof(y))zero_one * z) <op> y */
"optimizes" code with op in { plus, ior, xor } like
if (a & 1)
b = b <op> c;
to something like:
x1 = EXTRACT_BIT0 (a);
x2 = c MULT x1;
b = b <op> x2;
or
x1 = EXTRACT_BIT0 (a);
x2 = ZERO_EXTEND (x1);
x3 = NEG x2;
x4 = a AND x3:
b = b <op> x4;
which is very expensive and may even result in a libgcc call for
a 32-bit multiplication on devices that don't even have MUL.
Notice that EXTRACT_BIT0 is already more expensive (slower, more
code, more register pressure) than a bit-test + branch.
The patch:
o Adds some combiner patterns that try to map sick code back
to a bit test + branch.
o Adjusts costs to make MULT (x AND 1) cheap, in the hope that the
middle-end will use that alternative (which we map to sane code).
o On devices without MUL, 32-bit multiplication was performed by a
library call, which bypasses the MULT (x AND 1) and similar patterns.
Therefore, mulsi3 is also allowed for devices without MUL so that
we get at MULT pattern that can be transformed. (Though this is
not possible on AVR_TINY since it passes arguments on the stack).
o Add a new command line option -mpr118012, so most of the patterns
and cost computations can be switched off as they have
avropt_pr118012 in their insn condition.
o Added sign-extract.0 patterns unconditionally (no avropt_pr118012).
Notice that this patch is just a work-around, it's not a fix of the
root cause, which are the patterns in match.pd that don't care about
the target and don't even care about costs.
The work-around is incomplete, and 3 of the new tests are still failing.
This is because there are situations where it does not work:
* The MULT is realized as a library call.
* The MULT is realized as an ASHIFT, and the ASHIFT again is transformed
into something else. For example, with -O2 -mmcu=atmega128,
ASHIFT(3) is transformed into ASHIFT(1) + ASHIFT(2).
PR tree-optimization/118012
PR tree-optimization/118360
gcc/
* config/avr/avr.opt (-mpr118012): New undocumented option.
* config/avr/avr-protos.h (avr_out_sextr)
(avr_emit_skip_pixop, avr_emit_skip_clear): New protos.
* config/avr/avr.cc (avr_adjust_insn_length)
[case ADJUST_LEN_SEXTR]: Handle case.
(avr_rtx_costs_1) [NEG]: Costs for NEG (ZERO_EXTEND (ZERO_EXTRACT)).
[MULT && avropt_pr118012]: Costs for MULT (x AND 1).
(avr_out_sextr, avr_emit_skip_pixop, avr_emit_skip_clear): New
functions.
* config/avr/avr.md [avropt_pr118012]: Add combine patterns with
that condition that try to work around PR118012.
(adjust_len) <sextr>: Add insn attr value.
(pixop): New code iterator.
(mulsi3) [avropt_pr118012 && !AVR_TINY]: Allow these in insn condition.
gcc/testsuite/
* gcc.target/avr/mmcu/pr118012-1.h: New file.
* gcc.target/avr/mmcu/pr118012-1-o2-m128.c: New test.
* gcc.target/avr/mmcu/pr118012-1-os-m128.c: New test.
* gcc.target/avr/mmcu/pr118012-1-o2-m103.c: New test.
* gcc.target/avr/mmcu/pr118012-1-os-m103.c: New test.
* gcc.target/avr/mmcu/pr118012-1-o2-t40.c: New test.
* gcc.target/avr/mmcu/pr118012-1-os-t40.c: New test.
* gcc.target/avr/mmcu/pr118360-1.h: New file.
* gcc.target/avr/mmcu/pr118360-1-o2-m128.c: New test.
* gcc.target/avr/mmcu/pr118360-1-os-m128.c: New test.
* gcc.target/avr/mmcu/pr118360-1-o2-m103.c: New test.
* gcc.target/avr/mmcu/pr118360-1-os-m103.c: New test.
* gcc.target/avr/mmcu/pr118360-1-o2-t40.c: New test.
* gcc.target/avr/mmcu/pr118360-1-os-t40.c: New test.
Diffstat (limited to 'gcc/config')
-rw-r--r-- | gcc/config/avr/avr-protos.h | 3 | ||||
-rw-r--r-- | gcc/config/avr/avr.cc | 173 | ||||
-rw-r--r-- | gcc/config/avr/avr.md | 390 | ||||
-rw-r--r-- | gcc/config/avr/avr.opt | 5 |
4 files changed, 564 insertions, 7 deletions
diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h index 8a1d1d3..83137c7 100644 --- a/gcc/config/avr/avr-protos.h +++ b/gcc/config/avr/avr-protos.h @@ -69,6 +69,7 @@ extern const char *avr_out_insert_notbit (rtx_insn *, rtx*, int*); extern const char *avr_out_insv (rtx_insn *, rtx*, int*); extern const char *avr_out_extr (rtx_insn *, rtx*, int*); extern const char *avr_out_extr_not (rtx_insn *, rtx*, int*); +extern const char *avr_out_sextr (rtx_insn *, rtx*, int*); extern const char *avr_out_plus_set_ZN (rtx*, int*); extern const char *avr_out_plus_set_N (rtx*, int*); extern const char *avr_out_op8_set_ZN (rtx_code, rtx*, int*); @@ -102,6 +103,8 @@ extern void avr_expand_prologue (void); extern void avr_expand_epilogue (bool); extern bool avr_emit_cpymemhi (rtx*); extern void avr_emit_xior_with_shift (rtx_insn*, rtx*, int); +extern void avr_emit_skip_pixop (rtx_code, rtx, rtx, rtx, rtx_code, rtx, int); +extern void avr_emit_skip_clear (rtx, rtx, rtx_code, rtx, int); extern bool avr_epilogue_uses (int regno); extern void avr_output_addr_vec (rtx_insn*, rtx); diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc index 8628a43..656d3e7 100644 --- a/gcc/config/avr/avr.cc +++ b/gcc/config/avr/avr.cc @@ -11076,6 +11076,7 @@ avr_adjust_insn_length (rtx_insn *insn, int len) case ADJUST_LEN_XLOAD: avr_out_xload (insn, op, &len); break; case ADJUST_LEN_FLOAD: avr_out_fload (insn, op, &len); break; case ADJUST_LEN_SEXT: avr_out_sign_extend (insn, op, &len); break; + case ADJUST_LEN_SEXTR: avr_out_sextr (insn, op, &len); break; case ADJUST_LEN_SFRACT: avr_out_fract (insn, op, true, &len); break; case ADJUST_LEN_UFRACT: avr_out_fract (insn, op, false, &len); break; @@ -12560,6 +12561,19 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code, ? INTVAL (XEXP (x, 1)) : -1; + if (avropt_pr118012) + { + if ((code == IOR || code == XOR || code == PLUS) + && GET_CODE (XEXP (x, 0)) == ASHIFT + && GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND + && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == AND + && XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 1) == const1_rtx) + { + *total = COSTS_N_INSNS (2 + n_bytes); + return true; + } + } + switch (code) { case CONST_INT: @@ -12577,6 +12591,20 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code, return true; case NEG: + if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND + && GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTRACT) + { + // Just a sign_extract of bit 0? + rtx y = XEXP (XEXP (x, 0), 0); + if (XEXP (y, 1) == const1_rtx + && XEXP (y, 2) == const0_rtx) + { + *total = COSTS_N_INSNS (1 + n_bytes + - (AVR_HAVE_MOVW && n_bytes == 4)); + return true; + } + } + switch (mode) { case E_QImode: @@ -12856,6 +12884,25 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code, return true; case MULT: + if (avropt_pr118012) + { + if (GET_CODE (XEXP (x, 0)) == AND + && XEXP (XEXP (x, 0), 1) == const1_rtx) + { + // Try to defeat PR118012. The MUL variant is actually very + // expensive, but combine is given a pattern to transform this + // into something less toxic. Though this might not work + // for SImode, and we still have a completely ridiculous + // 32-bit multiplication instead of a simple bit test on + // devices that don't even have MUL. This is because on + // AVR_TINY, we'll get a libcall which we cannot undo. + // (On other devices that don't have MUL, the libcall is + // bypassed by providing mulsi3, cf. insn mulsi3_[call_]pr118012. + *total = 0; + return true; + } + } // PR118012 + switch (mode) { case E_QImode: @@ -14383,6 +14430,132 @@ avr_out_sbxx_branch (rtx_insn *insn, rtx operands[]) } +/* Output code for XOP[0] = sign_extract (XOP[1].0) and return "". + PLEN == 0: Output instructions. + PLEN != 0: Set *PLEN to the length of the sequence in words. */ + +const char * +avr_out_sextr (rtx_insn *insn, rtx *xop, int *plen) +{ + rtx dest = xop[0]; + rtx src = xop[1]; + int bit = INTVAL (xop[2]); + int n_bytes = GET_MODE_SIZE (GET_MODE (dest)); + + gcc_assert (bit == 0); + + if (reg_unused_after (insn, src)) + avr_asm_len ("lsr %1", xop, plen, -1); + else + avr_asm_len ("mov %0,%1" CR_TAB + "lsr %0", xop, plen, -2); + + for (int i = 0; i < n_bytes; ++i) + { + rtx b = avr_byte (dest, i); + avr_asm_len ("sbc %0,%0", &b, plen, 1); + if (i == 1 && n_bytes == 4 && AVR_HAVE_MOVW) + return avr_asm_len ("movw %C0,%A0", xop, plen, 1); + } + + return ""; +} + + +/* + if (bits.bitno <eqne> 0) + dest = op0; + else + dest = op0 <pix> op1; + + Performed as: + + dest = op0; + if (bits.bitno <eqne> 0) + goto LL; + dest o= op1; +LL:; */ + +void +avr_emit_skip_pixop (rtx_code pix, rtx dest, rtx op0, rtx op1, + rtx_code eqne, rtx bits, int bitno) +{ + gcc_assert (eqne == EQ); + + const machine_mode mode = GET_MODE (dest); + + // Get rid of early-clobbers. + + if (reg_overlap_mentioned_p (dest, bits)) + bits = copy_to_mode_reg (GET_MODE (bits), bits); + + if (reg_overlap_mentioned_p (dest, op1)) + op1 = copy_to_mode_reg (mode, op1); + + // xorqi3 has "register_operand" for op1. + if (mode == QImode && pix == XOR) + op1 = force_reg (QImode, op1); + + emit_move_insn (dest, op0); + + // Skip if bits.bitno <eqne> bitno. + rtx xlabel = gen_label_rtx (); + rtx zerox = gen_rtx_ZERO_EXTRACT (QImode, bits, const1_rtx, GEN_INT (bitno)); + rtx cond = gen_rtx_fmt_ee (eqne, VOIDmode, zerox, const0_rtx); + emit (gen_sbrx_branchqi_split (cond, bits, const0_rtx, xlabel)); + + // Payload: plus, ior, xor for HI, PSI, SI have a scratch:QI; + // QI and plus:HI don't. + rtx src = gen_rtx_fmt_ee (pix, mode, dest, op1); + rtx set = gen_rtx_SET (dest, src); + rtx clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (QImode)); + bool no_scratch = mode == QImode || (mode == HImode && pix == PLUS); + emit (no_scratch + ? set + : gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber))); + + emit_label (xlabel); +} + + +/* + if (bits.bitno <eqne> 0) + dest = src; + else + dest = 0; + + Performed as: + + dest = src; + if (bits.bitno <eqne> 0) + goto LL; + dest = 0; +LL:; */ + +void +avr_emit_skip_clear (rtx dest, rtx src, rtx_code eqne, rtx bits, int bitno) +{ + const machine_mode mode = GET_MODE (dest); + + // Get rid of early-clobber. + if (reg_overlap_mentioned_p (dest, bits)) + bits = copy_to_mode_reg (GET_MODE (bits), bits); + + emit_move_insn (dest, src); + + // Skip if bits.bitno <eqne> bitno. + rtx xlabel = gen_label_rtx (); + rtx zerox = gen_rtx_ZERO_EXTRACT (QImode, bits, const1_rtx, GEN_INT (bitno)); + rtx cond = gen_rtx_fmt_ee (eqne, VOIDmode, zerox, const0_rtx); + emit (gen_sbrx_branchqi_split (cond, bits, const0_rtx, xlabel)); + + // Payload: dest = 0; + emit_move_insn (dest, CONST0_RTX (mode)); + + emit_label (xlabel); +} + + /* Worker function for `TARGET_ASM_CONSTRUCTOR'. */ static void diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md index 6550fad..1c95611 100644 --- a/gcc/config/avr/avr.md +++ b/gcc/config/avr/avr.md @@ -160,7 +160,7 @@ ;; Otherwise do special processing depending on the attribute. (define_attr "adjust_len" - "out_bitop, plus, addto_sp, sext, extr, extr_not, plus_ext, + "out_bitop, plus, addto_sp, sext, extr, extr_not, plus_ext, sextr, tsthi, tstpsi, tstsi, compare, compare64, call, mov8, mov16, mov24, mov32, reload_in16, reload_in24, reload_in32, ufract, sfract, round, @@ -336,6 +336,7 @@ (define_code_iterator any_lshift [lshiftrt ashift]) ; logic shift (define_code_iterator piaop [plus ior and]) +(define_code_iterator pixop [plus ior xor]) (define_code_iterator bitop [xor ior and]) (define_code_iterator xior [xor ior]) (define_code_iterator eqne [eq ne]) @@ -3337,8 +3338,18 @@ (match_operand:SI 2 "nonmemory_operand" ""))) (clobber (reg:HI 26)) (clobber (reg:DI 18))])] - "AVR_HAVE_MUL" + "AVR_HAVE_MUL + || (avropt_pr118012 + /* AVR_TINY passes args on the stack, so we cannot work + around PR118012 like this. */ + && ! AVR_TINY)" { + if (! AVR_HAVE_MUL) + { + emit (gen_gen_mulsi3_pr118012 (operands[0], operands[1], operands[2])); + DONE; + } + if (u16_operand (operands[2], SImode)) { operands[2] = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode)); @@ -3358,6 +3369,26 @@ DONE; }) +;; With PR118012, we do __mulsi3 as a transparent call, so insn combine +;; can transform (mult:SI (and:SI * (const_int 1))) into something +;; less toxic. +(define_expand "gen_mulsi3_pr118012" + [(parallel [(set (match_operand:SI 0 "register_operand") + (mult:SI (match_operand:SI 1 "register_operand") + (match_operand:SI 2 "nonmemory_operand"))) + (clobber (reg:HI 26)) + (clobber (reg:HI 30)) + (clobber (reg:DI 18))])] + "avropt_pr118012 + && ! AVR_HAVE_MUL + && ! AVR_TINY" + { + operands[2] = force_reg (SImode, operands[2]); + if (avr_emit3_fix_outputs (gen_gen_mulsi3_pr118012, operands, 1 << 0, + regmask (DImode, 18) | regmask (HImode, 26) | regmask (HImode, 30))) + DONE; + }) + (define_insn_and_split "*mulsi3" [(set (match_operand:SI 0 "pseudo_register_operand" "=r") (mult:SI (match_operand:SI 1 "pseudo_register_operand" "r") @@ -3393,6 +3424,33 @@ } }) +(define_insn_and_split "*mulsi3_pr118012" + [(set (match_operand:SI 0 "pseudo_register_operand" "=r") + (mult:SI (match_operand:SI 1 "pseudo_register_operand" "r") + (match_operand:SI 2 "pseudo_register_operand" "r"))) + (clobber (reg:HI 26)) + (clobber (reg:HI 30)) + (clobber (reg:DI 18))] + "avropt_pr118012 + && ! AVR_HAVE_MUL + && ! AVR_TINY + && ! reload_completed" + { gcc_unreachable(); } + "&& 1" + [(set (reg:SI 18) + (match_dup 1)) + (set (reg:SI 22) + (match_dup 2)) + (parallel [(set (reg:SI 22) + (mult:SI (reg:SI 22) + (reg:SI 18))) + (clobber (reg:SI 18)) + (clobber (reg:HI 26)) + (clobber (reg:HI 30))]) + (set (match_dup 0) + (reg:SI 22))]) + + ;; "muluqisi3" ;; "muluhisi3" (define_expand "mulu<mode>si3" @@ -3658,6 +3716,26 @@ (clobber (reg:HI 26)) (clobber (reg:CC REG_CC))])]) +(define_insn_and_split "*mulsi3_call_pr118012_split" + [(set (reg:SI 22) + (mult:SI (reg:SI 22) + (reg:SI 18))) + (clobber (reg:SI 18)) + (clobber (reg:HI 26)) + (clobber (reg:HI 30))] + "avropt_pr118012 + && ! AVR_HAVE_MUL + && ! AVR_TINY" + "#" + "&& reload_completed" + [(parallel [(set (reg:SI 22) + (mult:SI (reg:SI 22) + (reg:SI 18))) + (clobber (reg:SI 18)) + (clobber (reg:HI 26)) + (clobber (reg:HI 30)) + (clobber (reg:CC REG_CC))])]) + (define_insn "*mulsi3_call" [(set (reg:SI 22) (mult:SI (reg:SI 22) @@ -3668,6 +3746,21 @@ "%~call __mulsi3" [(set_attr "type" "xcall")]) +(define_insn "*mulsi3_call_pr118012" + [(set (reg:SI 22) + (mult:SI (reg:SI 22) + (reg:SI 18))) + (clobber (reg:SI 18)) + (clobber (reg:HI 26)) + (clobber (reg:HI 30)) + (clobber (reg:CC REG_CC))] + "avropt_pr118012 + && ! AVR_HAVE_MUL + && ! AVR_TINY + && reload_completed" + "%~call __mulsi3" + [(set_attr "type" "xcall")]) + ;; "*mulhisi3_call" ;; "*umulhisi3_call" (define_insn_and_split "*<extend_u>mulhisi3_call_split" @@ -7508,7 +7601,7 @@ ;; Combine will create zero-extract patterns for single-bit tests. ;; Permit any mode in source pattern by using VOIDmode. -(define_insn_and_split "*sbrx_branch<mode>_split" +(define_insn_and_split "sbrx_branch<mode>_split" [(set (pc) (if_then_else (match_operator 0 "eqne_operator" @@ -8975,8 +9068,8 @@ [(set (pc) (if_then_else (ge (match_operand:QI 0 "register_operand" "") (const_int 0)) - (label_ref (match_operand 1 "" "")) - (pc)))] + (label_ref (match_operand 1 "" "")) + (pc)))] "" "#" "reload_completed" @@ -10307,9 +10400,9 @@ (define_insn_and_split "*extzv.qihi1" [(set (match_operand:HI 0 "register_operand" "=r") - (zero_extract:HI (match_operand:QI 1 "register_operand" "r") + (zero_extract:HI (match_operand:QIHI 1 "register_operand" "r") (const_int 1) - (match_operand:QI 2 "const_0_to_7_operand" "n")))] + (match_operand:QI 2 "const_0_to_<MSB>_operand" "n")))] "" "#" "" @@ -10532,6 +10625,289 @@ (match_dup 2)))]) +(define_insn_and_split "*sextr.<QISI:mode>.<QISI2:mode>_split" + [(set (match_operand:QISI 0 "register_operand" "=r") + (sign_extract:QISI (match_operand:QISI2 1 "register_operand" "r") + (const_int 1) + (match_operand:QI 2 "const0_operand" "L")))] + "" + "#" + "&& reload_completed" + [(parallel [(set (match_dup 0) + (sign_extract:QISI (match_dup 1) + (const_int 1) + (match_dup 2))) + (clobber (reg:CC REG_CC))])]) + +(define_insn "*sextr.<QISI:mode>.<QISI2:mode>" + [(set (match_operand:QISI 0 "register_operand" "=r") + (sign_extract:QISI (match_operand:QISI2 1 "register_operand" "r") + (const_int 1) + (match_operand:QI 2 "const0_operand" "L"))) + (clobber (reg:CC REG_CC))] + "reload_completed" + { + return avr_out_sextr (insn, operands, NULL); + } + [(set_attr "adjust_len" "sextr")]) + + +(define_insn_and_split "*neg.zextr-to-sextr.<HISI:mode>.<QISI:mode>" + [(set (match_operand:HISI 0 "register_operand") + (neg:HISI (zero_extend:HISI + (zero_extract:QIPSI (match_operand:QISI 1 "register_operand") + (const_int 1) + (match_operand:QI 2 "const0_operand")))))] + "avropt_pr118012 + && <HISI:SIZE> > <QIPSI:SIZE> + && ! reload_completed" + { gcc_unreachable (); } + "&& 1" + [(set (match_dup 0) + (sign_extract:HISI (match_dup 1) + (const_int 1) + (match_dup 2)))]) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; PR118012: match.pd's +;; +;; /* (zero_one == 0) ? y : z <op> y -> ((typeof(y))zero_one * z) <op> y */ +;; /* (zero_one != 0) ? z <op> y : y -> ((typeof(y))zero_one * z) <op> y */ +;; +;; introduces a crazy "optimization" that transforms code like +;; +;; if (b & 1) +;; c ^= a; +;; to +;; +;; u = extract_bit0 (b); +;; v = zero_extend (u); +;; w = NEG v; +;; x = a AND w +;; c ^= x +;; +;; or even to +;; +;; u = extract_bit0 (b); +;; v = a MULT u +;; c ^= v +;; +;; even on machines that don't have MUL instructions or that +;; have to perform the multiplication by means of a libgcc call. +;; Try to fix that below. Notice that on AVR_TINY no MUL insn is +;; available since is is performed as a libgcc call from which we +;; cannot roll back. With !AVR_HAVR_MULMUL it's a transparent call +;; from avr.md so we can get rid of that at least. + +;; Map +;; $0 = ((sign_extract ($1.0)) AND $3) <op> $4 +;; to +;; $0 = $4 +;; if ($1.0 == 0) +;; goto L +;; $0 <op>= $3 +;; L:; +(define_insn_and_split "*pixop-to-skip.<QISI:mode>" + [(set (match_operand:QISI 0 "register_operand") + (pixop:QISI (and:QISI (sign_extract:QISI (match_operand:QISI2 1 "register_operand") + (const_int 1) + (match_operand:QI 2 "const0_operand")) + (match_operand:QISI 3 "nonmemory_operand")) + (match_operand:QISI 4 "register_operand")))] + "avropt_pr118012 + && ! reload_completed" + { gcc_unreachable (); } + "&& 1" + [(scratch)] + { + avr_emit_skip_pixop (<pixop:CODE>, operands[0], operands[4], operands[3], + EQ, operands[1], 0); + DONE; + }) + +;; Map +;; $0 = (($1 AND 1) MULT $2) o $3 +;; to +;; $0 = $3 +;; if ($1.0 == 0) +;; goto L +;; $0 o= $2 +;; L:; +(define_insn_and_split "*mul.and1-to-skip.<mode>" + [(set (match_operand:QISI 0 "register_operand") + (pixop:QISI (mult:QISI (and:QISI (match_operand:QISI 1 "register_operand") + (const_int 1)) + (match_operand:QISI 2 "nonmemory_operand")) + (match_operand:QISI 3 "register_operand")))] + "avropt_pr118012 + && ! reload_completed" + { gcc_unreachable (); } + "&& 1" + [(scratch)] + { + avr_emit_skip_pixop (<CODE>, operands[0], operands[3], operands[2], + EQ, operands[1], 0); + DONE; + }) + +(define_insn_and_split "*mul.ext.and1-to-skip.<HISI:mode>" + [(set (match_operand:HISI 0 "register_operand") + (pixop:HISI (mult:HISI (any_extend:HISI (and:QIPSI (match_operand:QIPSI 1 "register_operand") + (const_int 1))) + (match_operand:HISI 2 "nonmemory_operand")) + (match_operand:HISI 3 "register_operand")))] + "avropt_pr118012 + && <HISI:SIZE> > <QIPSI:SIZE> + && ! reload_completed" + { gcc_unreachable (); } + "&& 1" + [(scratch)] + { + avr_emit_skip_pixop (<pixop:CODE>, operands[0], operands[3], operands[2], + EQ, operands[1], 0); + DONE; + }) + +;; Like the one above, but where $2 was a power of 2 and MULT has been +;; transformed to ASHIFT (PR118360). +(define_insn_and_split "*shl.ext.and1-to-skip.<HISI:mode>" + [(set (match_operand:HISI 0 "register_operand") + (pixop:HISI (ashift:HISI (any_extend:HISI (and:QIPSI (match_operand:QIPSI 1 "register_operand") + (const_int 1))) + (match_operand:QI 2 "const_int_operand")) + (match_operand:HISI 3 "register_operand")))] + "avropt_pr118012 + && <HISI:SIZE> > <QIPSI:SIZE> + && ! reload_completed" + { gcc_unreachable (); } + "&& 1" + [(scratch)] + { + rtx op2 = gen_int_mode (1u << INTVAL (operands[2]), <HISI:MODE>mode); + avr_emit_skip_pixop (<pixop:CODE>, operands[0], operands[3], op2, + EQ, operands[1], 0); + DONE; + }) + +(define_insn_and_split "*shl.and-to-skip.<mode>" + [(set (match_operand:HISI 0 "register_operand") + (pixop:HISI (and:HISI (ashift:HISI (match_operand:HISI 1 "register_operand") + (match_operand:QI 4 "const_0_to_<MSB>_operand")) + (match_operand:HISI 2 "single_one_operand")) + (match_operand:HISI 3 "register_operand")))] + "avropt_pr118012 + && exact_log2 (UINTVAL (operands[2]) & GET_MODE_MASK (<MODE>mode)) + == INTVAL (operands[4]) + && ! reload_completed" + { gcc_unreachable (); } + "&& 1" + [(scratch)] + { + avr_emit_skip_pixop (<CODE>, operands[0], operands[3], operands[2], + EQ, operands[1], 0); + DONE; + }) + + +;; Map +;; $0 = ($1 AND 1) MULT $2 +;; to +;; $0 = $2 +;; if ($1.0 != 0) +;; goto L +;; $0 = 0 +;; L:; +(define_insn_and_split "*map.mul.and1-to-skip.<QISI:mode>" + [(set (match_operand:QISI 0 "register_operand") + (mult:QISI (and:QISI (match_operand:QISI2 1 "register_operand") + (const_int 1)) + (match_operand:QISI 2 "nonmemory_operand")))] + "avropt_pr118012 + && ! reload_completed" + { gcc_unreachable (); } + "&& 1" + [(scratch)] + { + avr_emit_skip_clear (operands[0], operands[2], NE, operands[1], 0); + DONE; + }) + +(define_insn_and_split "*map.mul.and1-to-skip.<mode>" + [(set (match_operand:QISI 0 "register_operand") + (mult:QISI (and:QISI (match_operand:QISI 1 "register_operand") + (const_int 1)) + (match_operand:QISI 2 "nonmemory_operand")))] + "avropt_pr118012 + && ! reload_completed" + { gcc_unreachable (); } + "&& 1" + [(scratch)] + { + avr_emit_skip_clear (operands[0], operands[2], NE, operands[1], 0); + DONE; + }) + +(define_insn_and_split "*map.mul.ext.and1-to-skip.<HISI:mode>" + [(set (match_operand:HISI 0 "register_operand") + (mult:HISI (any_extend:HISI (and:QIPSI (match_operand:QIPSI 1 "register_operand") + (const_int 1))) + (match_operand:HISI 2 "nonmemory_operand")))] + "avropt_pr118012 + && <HISI:SIZE> > <QIPSI:SIZE> + && ! reload_completed" + { gcc_unreachable (); } + "&& 1" + [(scratch)] + { + avr_emit_skip_clear (operands[0], operands[2], NE, operands[1], 0); + DONE; + }) + +;; Similar, but the MULT has been turned to ASHIFT. +(define_insn_and_split "*map.shl.ext.and1-to-skip.<HISI:mode>" + [(set (match_operand:HISI 0 "register_operand") + (ashift:HISI (any_extend:HISI (and:QIPSI (match_operand:QIPSI 1 "register_operand") + (const_int 1))) + (match_operand:QI 2 "const_0_to_<HISI:MSB>_operand")))] + "avropt_pr118012 + && <HISI:SIZE> > <QIPSI:SIZE> + && ! reload_completed" + { gcc_unreachable (); } + "&& 1" + [(scratch)] + { + rtx op2 = gen_int_mode (1u << INTVAL (operands[2]), <HISI:MODE>mode); + avr_emit_skip_clear (operands[0], op2, NE, operands[1], 0); + DONE; + }) + + +;; Map +;; $0 = sign_extract($1.0) AND $3 +;; to +;; $0 = $3 +;; if ($1.0 != 0) +;; goto L +;; $0 = 0 +;; L:; +(define_insn_and_split "*map.and1-to-skip.<QISI:mode>" + [(set (match_operand:QISI 0 "register_operand") + (and:QISI (sign_extract:QISI (match_operand:QISI2 1 "register_operand") + (const_int 1) + (match_operand:QI 2 "const0_operand")) + (match_operand:QISI 3 "nonmemory_operand")))] + "avropt_pr118012 + && ! reload_completed" + { gcc_unreachable (); } + "&& 1" + [(scratch)] + { + avr_emit_skip_clear (operands[0], operands[3], NE, operands[1], 0); + DONE; + }) + + ;; Work around PR115307: Early passes expand isinf/f/l to a bloat. ;; These passes do not consider costs, and there is no way to ;; hook in or otherwise disable the generated bloat. diff --git a/gcc/config/avr/avr.opt b/gcc/config/avr/avr.opt index 8eb9b3d..ce6a8db 100644 --- a/gcc/config/avr/avr.opt +++ b/gcc/config/avr/avr.opt @@ -52,6 +52,11 @@ Target Undocumented Mask(ALL_DEBUG) mlog= Target RejectNegative Joined Undocumented Var(avropt_log_details) +;; Tries to work around PR118012. +mpr118012 +Target Var(avropt_pr118012) UInteger Init(1) Undocumented +This option is on per default in order to work around PR118012. + mshort-calls Target RejectNegative Mask(SHORT_CALLS) This option is used internally for multilib generation and selection. Assume RJMP / RCALL can target all program memory. |