diff options
author | Georg-Johann Lay <avr@gjlay.de> | 2025-01-11 14:10:29 +0100 |
---|---|---|
committer | Georg-Johann Lay <avr@gjlay.de> | 2025-01-23 17:38:05 +0100 |
commit | 0bb3223097e5ced4f9a13d18c6c65f2a9496437e (patch) | |
tree | f290432aa22214479aecbdb3c6989f734ffc1bdc /gcc/config/avr/avr.md | |
parent | 2d55c0161562f96d2230cd132b494a5d06352a23 (diff) | |
download | gcc-0bb3223097e5ced4f9a13d18c6c65f2a9496437e.zip gcc-0bb3223097e5ced4f9a13d18c6c65f2a9496437e.tar.gz gcc-0bb3223097e5ced4f9a13d18c6c65f2a9496437e.tar.bz2 |
AVR: PR118012 - Try to work around sick code from match.pd.
This patch tries to work around PR118012 which may use a
full fledged multiplication instead of a simple bit test.
This is because match.pd's
/* (zero_one == 0) ? y : z <op> y -> ((typeof(y))zero_one * z) <op> y */
/* (zero_one != 0) ? z <op> y : y -> ((typeof(y))zero_one * z) <op> y */
"optimizes" code with op in { plus, ior, xor } like
if (a & 1)
b = b <op> c;
to something like:
x1 = EXTRACT_BIT0 (a);
x2 = c MULT x1;
b = b <op> x2;
or
x1 = EXTRACT_BIT0 (a);
x2 = ZERO_EXTEND (x1);
x3 = NEG x2;
x4 = a AND x3:
b = b <op> x4;
which is very expensive and may even result in a libgcc call for
a 32-bit multiplication on devices that don't even have MUL.
Notice that EXTRACT_BIT0 is already more expensive (slower, more
code, more register pressure) than a bit-test + branch.
The patch:
o Adds some combiner patterns that try to map sick code back
to a bit test + branch.
o Adjusts costs to make MULT (x AND 1) cheap, in the hope that the
middle-end will use that alternative (which we map to sane code).
o On devices without MUL, 32-bit multiplication was performed by a
library call, which bypasses the MULT (x AND 1) and similar patterns.
Therefore, mulsi3 is also allowed for devices without MUL so that
we get at MULT pattern that can be transformed. (Though this is
not possible on AVR_TINY since it passes arguments on the stack).
o Add a new command line option -mpr118012, so most of the patterns
and cost computations can be switched off as they have
avropt_pr118012 in their insn condition.
o Added sign-extract.0 patterns unconditionally (no avropt_pr118012).
Notice that this patch is just a work-around, it's not a fix of the
root cause, which are the patterns in match.pd that don't care about
the target and don't even care about costs.
The work-around is incomplete, and 3 of the new tests are still failing.
This is because there are situations where it does not work:
* The MULT is realized as a library call.
* The MULT is realized as an ASHIFT, and the ASHIFT again is transformed
into something else. For example, with -O2 -mmcu=atmega128,
ASHIFT(3) is transformed into ASHIFT(1) + ASHIFT(2).
PR tree-optimization/118012
PR tree-optimization/118360
gcc/
* config/avr/avr.opt (-mpr118012): New undocumented option.
* config/avr/avr-protos.h (avr_out_sextr)
(avr_emit_skip_pixop, avr_emit_skip_clear): New protos.
* config/avr/avr.cc (avr_adjust_insn_length)
[case ADJUST_LEN_SEXTR]: Handle case.
(avr_rtx_costs_1) [NEG]: Costs for NEG (ZERO_EXTEND (ZERO_EXTRACT)).
[MULT && avropt_pr118012]: Costs for MULT (x AND 1).
(avr_out_sextr, avr_emit_skip_pixop, avr_emit_skip_clear): New
functions.
* config/avr/avr.md [avropt_pr118012]: Add combine patterns with
that condition that try to work around PR118012.
(adjust_len) <sextr>: Add insn attr value.
(pixop): New code iterator.
(mulsi3) [avropt_pr118012 && !AVR_TINY]: Allow these in insn condition.
gcc/testsuite/
* gcc.target/avr/mmcu/pr118012-1.h: New file.
* gcc.target/avr/mmcu/pr118012-1-o2-m128.c: New test.
* gcc.target/avr/mmcu/pr118012-1-os-m128.c: New test.
* gcc.target/avr/mmcu/pr118012-1-o2-m103.c: New test.
* gcc.target/avr/mmcu/pr118012-1-os-m103.c: New test.
* gcc.target/avr/mmcu/pr118012-1-o2-t40.c: New test.
* gcc.target/avr/mmcu/pr118012-1-os-t40.c: New test.
* gcc.target/avr/mmcu/pr118360-1.h: New file.
* gcc.target/avr/mmcu/pr118360-1-o2-m128.c: New test.
* gcc.target/avr/mmcu/pr118360-1-os-m128.c: New test.
* gcc.target/avr/mmcu/pr118360-1-o2-m103.c: New test.
* gcc.target/avr/mmcu/pr118360-1-os-m103.c: New test.
* gcc.target/avr/mmcu/pr118360-1-o2-t40.c: New test.
* gcc.target/avr/mmcu/pr118360-1-os-t40.c: New test.
Diffstat (limited to 'gcc/config/avr/avr.md')
-rw-r--r-- | gcc/config/avr/avr.md | 390 |
1 files changed, 383 insertions, 7 deletions
diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md index 6550fad..1c95611 100644 --- a/gcc/config/avr/avr.md +++ b/gcc/config/avr/avr.md @@ -160,7 +160,7 @@ ;; Otherwise do special processing depending on the attribute. (define_attr "adjust_len" - "out_bitop, plus, addto_sp, sext, extr, extr_not, plus_ext, + "out_bitop, plus, addto_sp, sext, extr, extr_not, plus_ext, sextr, tsthi, tstpsi, tstsi, compare, compare64, call, mov8, mov16, mov24, mov32, reload_in16, reload_in24, reload_in32, ufract, sfract, round, @@ -336,6 +336,7 @@ (define_code_iterator any_lshift [lshiftrt ashift]) ; logic shift (define_code_iterator piaop [plus ior and]) +(define_code_iterator pixop [plus ior xor]) (define_code_iterator bitop [xor ior and]) (define_code_iterator xior [xor ior]) (define_code_iterator eqne [eq ne]) @@ -3337,8 +3338,18 @@ (match_operand:SI 2 "nonmemory_operand" ""))) (clobber (reg:HI 26)) (clobber (reg:DI 18))])] - "AVR_HAVE_MUL" + "AVR_HAVE_MUL + || (avropt_pr118012 + /* AVR_TINY passes args on the stack, so we cannot work + around PR118012 like this. */ + && ! AVR_TINY)" { + if (! AVR_HAVE_MUL) + { + emit (gen_gen_mulsi3_pr118012 (operands[0], operands[1], operands[2])); + DONE; + } + if (u16_operand (operands[2], SImode)) { operands[2] = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode)); @@ -3358,6 +3369,26 @@ DONE; }) +;; With PR118012, we do __mulsi3 as a transparent call, so insn combine +;; can transform (mult:SI (and:SI * (const_int 1))) into something +;; less toxic. +(define_expand "gen_mulsi3_pr118012" + [(parallel [(set (match_operand:SI 0 "register_operand") + (mult:SI (match_operand:SI 1 "register_operand") + (match_operand:SI 2 "nonmemory_operand"))) + (clobber (reg:HI 26)) + (clobber (reg:HI 30)) + (clobber (reg:DI 18))])] + "avropt_pr118012 + && ! AVR_HAVE_MUL + && ! AVR_TINY" + { + operands[2] = force_reg (SImode, operands[2]); + if (avr_emit3_fix_outputs (gen_gen_mulsi3_pr118012, operands, 1 << 0, + regmask (DImode, 18) | regmask (HImode, 26) | regmask (HImode, 30))) + DONE; + }) + (define_insn_and_split "*mulsi3" [(set (match_operand:SI 0 "pseudo_register_operand" "=r") (mult:SI (match_operand:SI 1 "pseudo_register_operand" "r") @@ -3393,6 +3424,33 @@ } }) +(define_insn_and_split "*mulsi3_pr118012" + [(set (match_operand:SI 0 "pseudo_register_operand" "=r") + (mult:SI (match_operand:SI 1 "pseudo_register_operand" "r") + (match_operand:SI 2 "pseudo_register_operand" "r"))) + (clobber (reg:HI 26)) + (clobber (reg:HI 30)) + (clobber (reg:DI 18))] + "avropt_pr118012 + && ! AVR_HAVE_MUL + && ! AVR_TINY + && ! reload_completed" + { gcc_unreachable(); } + "&& 1" + [(set (reg:SI 18) + (match_dup 1)) + (set (reg:SI 22) + (match_dup 2)) + (parallel [(set (reg:SI 22) + (mult:SI (reg:SI 22) + (reg:SI 18))) + (clobber (reg:SI 18)) + (clobber (reg:HI 26)) + (clobber (reg:HI 30))]) + (set (match_dup 0) + (reg:SI 22))]) + + ;; "muluqisi3" ;; "muluhisi3" (define_expand "mulu<mode>si3" @@ -3658,6 +3716,26 @@ (clobber (reg:HI 26)) (clobber (reg:CC REG_CC))])]) +(define_insn_and_split "*mulsi3_call_pr118012_split" + [(set (reg:SI 22) + (mult:SI (reg:SI 22) + (reg:SI 18))) + (clobber (reg:SI 18)) + (clobber (reg:HI 26)) + (clobber (reg:HI 30))] + "avropt_pr118012 + && ! AVR_HAVE_MUL + && ! AVR_TINY" + "#" + "&& reload_completed" + [(parallel [(set (reg:SI 22) + (mult:SI (reg:SI 22) + (reg:SI 18))) + (clobber (reg:SI 18)) + (clobber (reg:HI 26)) + (clobber (reg:HI 30)) + (clobber (reg:CC REG_CC))])]) + (define_insn "*mulsi3_call" [(set (reg:SI 22) (mult:SI (reg:SI 22) @@ -3668,6 +3746,21 @@ "%~call __mulsi3" [(set_attr "type" "xcall")]) +(define_insn "*mulsi3_call_pr118012" + [(set (reg:SI 22) + (mult:SI (reg:SI 22) + (reg:SI 18))) + (clobber (reg:SI 18)) + (clobber (reg:HI 26)) + (clobber (reg:HI 30)) + (clobber (reg:CC REG_CC))] + "avropt_pr118012 + && ! AVR_HAVE_MUL + && ! AVR_TINY + && reload_completed" + "%~call __mulsi3" + [(set_attr "type" "xcall")]) + ;; "*mulhisi3_call" ;; "*umulhisi3_call" (define_insn_and_split "*<extend_u>mulhisi3_call_split" @@ -7508,7 +7601,7 @@ ;; Combine will create zero-extract patterns for single-bit tests. ;; Permit any mode in source pattern by using VOIDmode. -(define_insn_and_split "*sbrx_branch<mode>_split" +(define_insn_and_split "sbrx_branch<mode>_split" [(set (pc) (if_then_else (match_operator 0 "eqne_operator" @@ -8975,8 +9068,8 @@ [(set (pc) (if_then_else (ge (match_operand:QI 0 "register_operand" "") (const_int 0)) - (label_ref (match_operand 1 "" "")) - (pc)))] + (label_ref (match_operand 1 "" "")) + (pc)))] "" "#" "reload_completed" @@ -10307,9 +10400,9 @@ (define_insn_and_split "*extzv.qihi1" [(set (match_operand:HI 0 "register_operand" "=r") - (zero_extract:HI (match_operand:QI 1 "register_operand" "r") + (zero_extract:HI (match_operand:QIHI 1 "register_operand" "r") (const_int 1) - (match_operand:QI 2 "const_0_to_7_operand" "n")))] + (match_operand:QI 2 "const_0_to_<MSB>_operand" "n")))] "" "#" "" @@ -10532,6 +10625,289 @@ (match_dup 2)))]) +(define_insn_and_split "*sextr.<QISI:mode>.<QISI2:mode>_split" + [(set (match_operand:QISI 0 "register_operand" "=r") + (sign_extract:QISI (match_operand:QISI2 1 "register_operand" "r") + (const_int 1) + (match_operand:QI 2 "const0_operand" "L")))] + "" + "#" + "&& reload_completed" + [(parallel [(set (match_dup 0) + (sign_extract:QISI (match_dup 1) + (const_int 1) + (match_dup 2))) + (clobber (reg:CC REG_CC))])]) + +(define_insn "*sextr.<QISI:mode>.<QISI2:mode>" + [(set (match_operand:QISI 0 "register_operand" "=r") + (sign_extract:QISI (match_operand:QISI2 1 "register_operand" "r") + (const_int 1) + (match_operand:QI 2 "const0_operand" "L"))) + (clobber (reg:CC REG_CC))] + "reload_completed" + { + return avr_out_sextr (insn, operands, NULL); + } + [(set_attr "adjust_len" "sextr")]) + + +(define_insn_and_split "*neg.zextr-to-sextr.<HISI:mode>.<QISI:mode>" + [(set (match_operand:HISI 0 "register_operand") + (neg:HISI (zero_extend:HISI + (zero_extract:QIPSI (match_operand:QISI 1 "register_operand") + (const_int 1) + (match_operand:QI 2 "const0_operand")))))] + "avropt_pr118012 + && <HISI:SIZE> > <QIPSI:SIZE> + && ! reload_completed" + { gcc_unreachable (); } + "&& 1" + [(set (match_dup 0) + (sign_extract:HISI (match_dup 1) + (const_int 1) + (match_dup 2)))]) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; PR118012: match.pd's +;; +;; /* (zero_one == 0) ? y : z <op> y -> ((typeof(y))zero_one * z) <op> y */ +;; /* (zero_one != 0) ? z <op> y : y -> ((typeof(y))zero_one * z) <op> y */ +;; +;; introduces a crazy "optimization" that transforms code like +;; +;; if (b & 1) +;; c ^= a; +;; to +;; +;; u = extract_bit0 (b); +;; v = zero_extend (u); +;; w = NEG v; +;; x = a AND w +;; c ^= x +;; +;; or even to +;; +;; u = extract_bit0 (b); +;; v = a MULT u +;; c ^= v +;; +;; even on machines that don't have MUL instructions or that +;; have to perform the multiplication by means of a libgcc call. +;; Try to fix that below. Notice that on AVR_TINY no MUL insn is +;; available since is is performed as a libgcc call from which we +;; cannot roll back. With !AVR_HAVR_MULMUL it's a transparent call +;; from avr.md so we can get rid of that at least. + +;; Map +;; $0 = ((sign_extract ($1.0)) AND $3) <op> $4 +;; to +;; $0 = $4 +;; if ($1.0 == 0) +;; goto L +;; $0 <op>= $3 +;; L:; +(define_insn_and_split "*pixop-to-skip.<QISI:mode>" + [(set (match_operand:QISI 0 "register_operand") + (pixop:QISI (and:QISI (sign_extract:QISI (match_operand:QISI2 1 "register_operand") + (const_int 1) + (match_operand:QI 2 "const0_operand")) + (match_operand:QISI 3 "nonmemory_operand")) + (match_operand:QISI 4 "register_operand")))] + "avropt_pr118012 + && ! reload_completed" + { gcc_unreachable (); } + "&& 1" + [(scratch)] + { + avr_emit_skip_pixop (<pixop:CODE>, operands[0], operands[4], operands[3], + EQ, operands[1], 0); + DONE; + }) + +;; Map +;; $0 = (($1 AND 1) MULT $2) o $3 +;; to +;; $0 = $3 +;; if ($1.0 == 0) +;; goto L +;; $0 o= $2 +;; L:; +(define_insn_and_split "*mul.and1-to-skip.<mode>" + [(set (match_operand:QISI 0 "register_operand") + (pixop:QISI (mult:QISI (and:QISI (match_operand:QISI 1 "register_operand") + (const_int 1)) + (match_operand:QISI 2 "nonmemory_operand")) + (match_operand:QISI 3 "register_operand")))] + "avropt_pr118012 + && ! reload_completed" + { gcc_unreachable (); } + "&& 1" + [(scratch)] + { + avr_emit_skip_pixop (<CODE>, operands[0], operands[3], operands[2], + EQ, operands[1], 0); + DONE; + }) + +(define_insn_and_split "*mul.ext.and1-to-skip.<HISI:mode>" + [(set (match_operand:HISI 0 "register_operand") + (pixop:HISI (mult:HISI (any_extend:HISI (and:QIPSI (match_operand:QIPSI 1 "register_operand") + (const_int 1))) + (match_operand:HISI 2 "nonmemory_operand")) + (match_operand:HISI 3 "register_operand")))] + "avropt_pr118012 + && <HISI:SIZE> > <QIPSI:SIZE> + && ! reload_completed" + { gcc_unreachable (); } + "&& 1" + [(scratch)] + { + avr_emit_skip_pixop (<pixop:CODE>, operands[0], operands[3], operands[2], + EQ, operands[1], 0); + DONE; + }) + +;; Like the one above, but where $2 was a power of 2 and MULT has been +;; transformed to ASHIFT (PR118360). +(define_insn_and_split "*shl.ext.and1-to-skip.<HISI:mode>" + [(set (match_operand:HISI 0 "register_operand") + (pixop:HISI (ashift:HISI (any_extend:HISI (and:QIPSI (match_operand:QIPSI 1 "register_operand") + (const_int 1))) + (match_operand:QI 2 "const_int_operand")) + (match_operand:HISI 3 "register_operand")))] + "avropt_pr118012 + && <HISI:SIZE> > <QIPSI:SIZE> + && ! reload_completed" + { gcc_unreachable (); } + "&& 1" + [(scratch)] + { + rtx op2 = gen_int_mode (1u << INTVAL (operands[2]), <HISI:MODE>mode); + avr_emit_skip_pixop (<pixop:CODE>, operands[0], operands[3], op2, + EQ, operands[1], 0); + DONE; + }) + +(define_insn_and_split "*shl.and-to-skip.<mode>" + [(set (match_operand:HISI 0 "register_operand") + (pixop:HISI (and:HISI (ashift:HISI (match_operand:HISI 1 "register_operand") + (match_operand:QI 4 "const_0_to_<MSB>_operand")) + (match_operand:HISI 2 "single_one_operand")) + (match_operand:HISI 3 "register_operand")))] + "avropt_pr118012 + && exact_log2 (UINTVAL (operands[2]) & GET_MODE_MASK (<MODE>mode)) + == INTVAL (operands[4]) + && ! reload_completed" + { gcc_unreachable (); } + "&& 1" + [(scratch)] + { + avr_emit_skip_pixop (<CODE>, operands[0], operands[3], operands[2], + EQ, operands[1], 0); + DONE; + }) + + +;; Map +;; $0 = ($1 AND 1) MULT $2 +;; to +;; $0 = $2 +;; if ($1.0 != 0) +;; goto L +;; $0 = 0 +;; L:; +(define_insn_and_split "*map.mul.and1-to-skip.<QISI:mode>" + [(set (match_operand:QISI 0 "register_operand") + (mult:QISI (and:QISI (match_operand:QISI2 1 "register_operand") + (const_int 1)) + (match_operand:QISI 2 "nonmemory_operand")))] + "avropt_pr118012 + && ! reload_completed" + { gcc_unreachable (); } + "&& 1" + [(scratch)] + { + avr_emit_skip_clear (operands[0], operands[2], NE, operands[1], 0); + DONE; + }) + +(define_insn_and_split "*map.mul.and1-to-skip.<mode>" + [(set (match_operand:QISI 0 "register_operand") + (mult:QISI (and:QISI (match_operand:QISI 1 "register_operand") + (const_int 1)) + (match_operand:QISI 2 "nonmemory_operand")))] + "avropt_pr118012 + && ! reload_completed" + { gcc_unreachable (); } + "&& 1" + [(scratch)] + { + avr_emit_skip_clear (operands[0], operands[2], NE, operands[1], 0); + DONE; + }) + +(define_insn_and_split "*map.mul.ext.and1-to-skip.<HISI:mode>" + [(set (match_operand:HISI 0 "register_operand") + (mult:HISI (any_extend:HISI (and:QIPSI (match_operand:QIPSI 1 "register_operand") + (const_int 1))) + (match_operand:HISI 2 "nonmemory_operand")))] + "avropt_pr118012 + && <HISI:SIZE> > <QIPSI:SIZE> + && ! reload_completed" + { gcc_unreachable (); } + "&& 1" + [(scratch)] + { + avr_emit_skip_clear (operands[0], operands[2], NE, operands[1], 0); + DONE; + }) + +;; Similar, but the MULT has been turned to ASHIFT. +(define_insn_and_split "*map.shl.ext.and1-to-skip.<HISI:mode>" + [(set (match_operand:HISI 0 "register_operand") + (ashift:HISI (any_extend:HISI (and:QIPSI (match_operand:QIPSI 1 "register_operand") + (const_int 1))) + (match_operand:QI 2 "const_0_to_<HISI:MSB>_operand")))] + "avropt_pr118012 + && <HISI:SIZE> > <QIPSI:SIZE> + && ! reload_completed" + { gcc_unreachable (); } + "&& 1" + [(scratch)] + { + rtx op2 = gen_int_mode (1u << INTVAL (operands[2]), <HISI:MODE>mode); + avr_emit_skip_clear (operands[0], op2, NE, operands[1], 0); + DONE; + }) + + +;; Map +;; $0 = sign_extract($1.0) AND $3 +;; to +;; $0 = $3 +;; if ($1.0 != 0) +;; goto L +;; $0 = 0 +;; L:; +(define_insn_and_split "*map.and1-to-skip.<QISI:mode>" + [(set (match_operand:QISI 0 "register_operand") + (and:QISI (sign_extract:QISI (match_operand:QISI2 1 "register_operand") + (const_int 1) + (match_operand:QI 2 "const0_operand")) + (match_operand:QISI 3 "nonmemory_operand")))] + "avropt_pr118012 + && ! reload_completed" + { gcc_unreachable (); } + "&& 1" + [(scratch)] + { + avr_emit_skip_clear (operands[0], operands[3], NE, operands[1], 0); + DONE; + }) + + ;; Work around PR115307: Early passes expand isinf/f/l to a bloat. ;; These passes do not consider costs, and there is no way to ;; hook in or otherwise disable the generated bloat. |