diff options
author | Enkovich Ilya <ilya.enkovich@intel.com> | 2011-09-08 13:41:27 +0000 |
---|---|---|
committer | H.J. Lu <hjl@gcc.gnu.org> | 2011-09-08 06:41:27 -0700 |
commit | d275ab8b8056a692e67d0f1a46bc766762f62806 (patch) | |
tree | 7ab64f0e1c0d2389927e770b85b9d813bf850f3e /gcc | |
parent | 868141900a1c1c7a76c5a48625dbfa1e81d8051c (diff) | |
download | gcc-d275ab8b8056a692e67d0f1a46bc766762f62806.zip gcc-d275ab8b8056a692e67d0f1a46bc766762f62806.tar.gz gcc-d275ab8b8056a692e67d0f1a46bc766762f62806.tar.bz2 |
Improve AGU stalls avoidance optimization.
2011-09-08 Enkovich Ilya <ilya.enkovich@intel.com>
* config/i386/i386-protos.h (ix86_lea_outperforms): New.
(ix86_avoid_lea_for_add): Likewise.
(ix86_avoid_lea_for_addr): Likewise.
(ix86_split_lea_for_addr): Likewise.
* config/i386/i386.c (LEA_MAX_STALL): New.
(increase_distance): Likewise.
(insn_defines_reg): Likewise.
(insn_uses_reg_mem): Likewise.
(distance_non_agu_define_in_bb): Likewise.
(distance_agu_use_in_bb): Likewise.
(ix86_lea_outperforms): Likewise.
(ix86_ok_to_clobber_flags): Likewise.
(ix86_avoid_lea_for_add): Likewise.
(ix86_avoid_lea_for_addr): Likewise.
(ix86_split_lea_for_addr): Likewise.
(distance_non_agu_define): Search in pred BBs added.
(distance_agu_use): Search in succ BBs added.
(IX86_LEA_PRIORITY): Value changed from 2 to 0.
(LEA_SEARCH_THRESHOLD): Now depends on LEA_MAX_STALL.
(ix86_lea_for_add_ok): Use ix86_lea_outperforms to make decision.
* config/i386/i386.md: Split added to transform non destructive
add into move and add.
(lea_1): transformed into insn_and_split to avoid AGU stalls.
(lea<mode>_2): Likewise.
From-SVN: r178689
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 29 | ||||
-rw-r--r-- | gcc/config/i386/i386-protos.h | 5 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 670 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 27 |
4 files changed, 598 insertions, 133 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 938aede..a0a89bb 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,32 @@ +2011-09-08 Enkovich Ilya <ilya.enkovich@intel.com> + + * config/i386/i386-protos.h (ix86_lea_outperforms): New. + (ix86_avoid_lea_for_add): Likewise. + (ix86_avoid_lea_for_addr): Likewise. + (ix86_split_lea_for_addr): Likewise. + + * config/i386/i386.c (LEA_MAX_STALL): New. + (increase_distance): Likewise. + (insn_defines_reg): Likewise. + (insn_uses_reg_mem): Likewise. + (distance_non_agu_define_in_bb): Likewise. + (distance_agu_use_in_bb): Likewise. + (ix86_lea_outperforms): Likewise. + (ix86_ok_to_clobber_flags): Likewise. + (ix86_avoid_lea_for_add): Likewise. + (ix86_avoid_lea_for_addr): Likewise. + (ix86_split_lea_for_addr): Likewise. + (distance_non_agu_define): Search in pred BBs added. + (distance_agu_use): Search in succ BBs added. + (IX86_LEA_PRIORITY): Value changed from 2 to 0. + (LEA_SEARCH_THRESHOLD): Now depends on LEA_MAX_STALL. + (ix86_lea_for_add_ok): Use ix86_lea_outperforms to make decision. + + * config/i386/i386.md: Split added to transform non destructive + add into move and add. + (lea_1): transformed into insn_and_split to avoid AGU stalls. + (lea<mode>_2): Likewise. + 2011-09-08 Martin Jambor <mjambor@suse.cz> PR tree-optimization/50287 diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 7deeae7..900d1c5 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -90,6 +90,11 @@ extern void ix86_fixup_binary_operands_no_copy (enum rtx_code, extern void ix86_expand_binary_operator (enum rtx_code, enum machine_mode, rtx[]); extern bool ix86_binary_operator_ok (enum rtx_code, enum machine_mode, rtx[]); +extern bool ix86_lea_outperforms (rtx, unsigned int, unsigned int, + unsigned int, unsigned int); +extern bool ix86_avoid_lea_for_add (rtx, rtx[]); +extern bool ix86_avoid_lea_for_addr (rtx, rtx[]); +extern void ix86_split_lea_for_addr (rtx[], enum machine_mode); extern bool ix86_lea_for_add_ok (rtx, rtx[]); extern bool ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high); extern bool ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 7bdab73..64deb77 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -15969,12 +15969,125 @@ ix86_split_idivmod (enum machine_mode mode, rtx operands[], emit_label (end_label); } -#define LEA_SEARCH_THRESHOLD 12 +#define LEA_MAX_STALL (3) +#define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1) + +/* Increase given DISTANCE in half-cycles according to + dependencies between PREV and NEXT instructions. + Add 1 half-cycle if there is no dependency and + go to next cycle if there is some dependecy. */ + +static unsigned int +increase_distance (rtx prev, rtx next, unsigned int distance) +{ + df_ref *use_rec; + df_ref *def_rec; + + if (!prev || !next) + return distance + (distance & 1) + 2; + + if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev)) + return distance + 1; + + for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++) + for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++) + if (!DF_REF_IS_ARTIFICIAL (*def_rec) + && DF_REF_REGNO (*use_rec) == DF_REF_REGNO (*def_rec)) + return distance + (distance & 1) + 2; + + return distance + 1; +} + +/* Function checks if instruction INSN defines register number + REGNO1 or REGNO2. */ + +static bool +insn_defines_reg (unsigned int regno1, unsigned int regno2, + rtx insn) +{ + df_ref *def_rec; + + for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++) + if (DF_REF_REG_DEF_P (*def_rec) + && !DF_REF_IS_ARTIFICIAL (*def_rec) + && (regno1 == DF_REF_REGNO (*def_rec) + || regno2 == DF_REF_REGNO (*def_rec))) + { + return true; + } + + return false; +} + +/* Function checks if instruction INSN uses register number + REGNO as a part of address expression. */ + +static bool +insn_uses_reg_mem (unsigned int regno, rtx insn) +{ + df_ref *use_rec; + + for (use_rec = DF_INSN_USES (insn); *use_rec; use_rec++) + if (DF_REF_REG_MEM_P (*use_rec) && regno == DF_REF_REGNO (*use_rec)) + return true; + + return false; +} + +/* Search backward for non-agu definition of register number REGNO1 + or register number REGNO2 in basic block starting from instruction + START up to head of basic block or instruction INSN. + + Function puts true value into *FOUND var if definition was found + and false otherwise. + + Distance in half-cycles between START and found instruction or head + of BB is added to DISTANCE and returned. */ + +static int +distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2, + rtx insn, int distance, + rtx start, bool *found) +{ + basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL; + rtx prev = start; + rtx next = NULL; + enum attr_type insn_type; + + *found = false; + + while (prev + && prev != insn + && distance < LEA_SEARCH_THRESHOLD) + { + if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev)) + { + distance = increase_distance (prev, next, distance); + if (insn_defines_reg (regno1, regno2, prev)) + { + insn_type = get_attr_type (prev); + if (insn_type != TYPE_LEA) + { + *found = true; + return distance; + } + } + + next = prev; + } + if (prev == BB_HEAD (bb)) + break; + + prev = PREV_INSN (prev); + } + + return distance; +} /* Search backward for non-agu definition of register number REGNO1 or register number REGNO2 in INSN's basic block until 1. Pass LEA_SEARCH_THRESHOLD instructions, or - 2. Reach BB boundary, or + 2. Reach neighbour BBs boundary, or 3. Reach agu definition. Returns the distance between the non-agu definition point and INSN. If no definition point, returns -1. */ @@ -15985,35 +16098,14 @@ distance_non_agu_define (unsigned int regno1, unsigned int regno2, { basic_block bb = BLOCK_FOR_INSN (insn); int distance = 0; - df_ref *def_rec; - enum attr_type insn_type; + bool found = false; if (insn != BB_HEAD (bb)) - { - rtx prev = PREV_INSN (insn); - while (prev && distance < LEA_SEARCH_THRESHOLD) - { - if (NONDEBUG_INSN_P (prev)) - { - distance++; - for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++) - if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF - && !DF_REF_IS_ARTIFICIAL (*def_rec) - && (regno1 == DF_REF_REGNO (*def_rec) - || regno2 == DF_REF_REGNO (*def_rec))) - { - insn_type = get_attr_type (prev); - if (insn_type != TYPE_LEA) - goto done; - } - } - if (prev == BB_HEAD (bb)) - break; - prev = PREV_INSN (prev); - } - } + distance = distance_non_agu_define_in_bb (regno1, regno2, insn, + distance, PREV_INSN (insn), + &found); - if (distance < LEA_SEARCH_THRESHOLD) + if (!found && distance < LEA_SEARCH_THRESHOLD) { edge e; edge_iterator ei; @@ -16027,88 +16119,121 @@ distance_non_agu_define (unsigned int regno1, unsigned int regno2, } if (simple_loop) + distance = distance_non_agu_define_in_bb (regno1, regno2, + insn, distance, + BB_END (bb), &found); + else { - rtx prev = BB_END (bb); - while (prev - && prev != insn - && distance < LEA_SEARCH_THRESHOLD) + int shortest_dist = -1; + bool found_in_bb = false; + + FOR_EACH_EDGE (e, ei, bb->preds) { - if (NONDEBUG_INSN_P (prev)) + int bb_dist = distance_non_agu_define_in_bb (regno1, regno2, + insn, distance, + BB_END (e->src), + &found_in_bb); + if (found_in_bb) { - distance++; - for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++) - if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF - && !DF_REF_IS_ARTIFICIAL (*def_rec) - && (regno1 == DF_REF_REGNO (*def_rec) - || regno2 == DF_REF_REGNO (*def_rec))) - { - insn_type = get_attr_type (prev); - if (insn_type != TYPE_LEA) - goto done; - } + if (shortest_dist < 0) + shortest_dist = bb_dist; + else if (bb_dist > 0) + shortest_dist = MIN (bb_dist, shortest_dist); } - prev = PREV_INSN (prev); + + found = found || found_in_bb; } + + distance = shortest_dist; } } - distance = -1; - -done: /* get_attr_type may modify recog data. We want to make sure that recog data is valid for instruction INSN, on which distance_non_agu_define is called. INSN is unchanged here. */ extract_insn_cached (insn); + + if (!found) + distance = -1; + else + distance = distance >> 1; + return distance; } -/* Return the distance between INSN and the next insn that uses - register number REGNO0 in memory address. Return -1 if no such - a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */ +/* Return the distance in half-cycles between INSN and the next + insn that uses register number REGNO in memory address added + to DISTANCE. Return -1 if REGNO0 is set. + + Put true value into *FOUND if register usage was found and + false otherwise. + Put true value into *REDEFINED if register redefinition was + found and false otherwise. */ static int -distance_agu_use (unsigned int regno0, rtx insn) +distance_agu_use_in_bb(unsigned int regno, + rtx insn, int distance, rtx start, + bool *found, bool *redefined) { - basic_block bb = BLOCK_FOR_INSN (insn); - int distance = 0; - df_ref *def_rec; - df_ref *use_rec; + basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL; + rtx next = start; + rtx prev = NULL; - if (insn != BB_END (bb)) + *found = false; + *redefined = false; + + while (next + && next != insn + && distance < LEA_SEARCH_THRESHOLD) { - rtx next = NEXT_INSN (insn); - while (next && distance < LEA_SEARCH_THRESHOLD) + if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next)) { - if (NONDEBUG_INSN_P (next)) + distance = increase_distance(prev, next, distance); + if (insn_uses_reg_mem (regno, next)) { - distance++; - - for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++) - if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD - || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE) - && regno0 == DF_REF_REGNO (*use_rec)) - { - /* Return DISTANCE if OP0 is used in memory - address in NEXT. */ - return distance; - } + /* Return DISTANCE if OP0 is used in memory + address in NEXT. */ + *found = true; + return distance; + } - for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++) - if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF - && !DF_REF_IS_ARTIFICIAL (*def_rec) - && regno0 == DF_REF_REGNO (*def_rec)) - { - /* Return -1 if OP0 is set in NEXT. */ - return -1; - } + if (insn_defines_reg (regno, INVALID_REGNUM, next)) + { + /* Return -1 if OP0 is set in NEXT. */ + *redefined = true; + return -1; } - if (next == BB_END (bb)) - break; - next = NEXT_INSN (next); + + prev = next; } + + if (next == BB_END (bb)) + break; + + next = NEXT_INSN (next); } - if (distance < LEA_SEARCH_THRESHOLD) + return distance; +} + +/* Return the distance between INSN and the next insn that uses + register number REGNO0 in memory address. Return -1 if no such + a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */ + +static int +distance_agu_use (unsigned int regno0, rtx insn) +{ + basic_block bb = BLOCK_FOR_INSN (insn); + int distance = 0; + bool found = false; + bool redefined = false; + + if (insn != BB_END (bb)) + distance = distance_agu_use_in_bb (regno0, insn, distance, + NEXT_INSN (insn), + &found, &redefined); + + if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD) { edge e; edge_iterator ei; @@ -16122,42 +16247,41 @@ distance_agu_use (unsigned int regno0, rtx insn) } if (simple_loop) + distance = distance_agu_use_in_bb (regno0, insn, + distance, BB_HEAD (bb), + &found, &redefined); + else { - rtx next = BB_HEAD (bb); - while (next - && next != insn - && distance < LEA_SEARCH_THRESHOLD) + int shortest_dist = -1; + bool found_in_bb = false; + bool redefined_in_bb = false; + + FOR_EACH_EDGE (e, ei, bb->succs) { - if (NONDEBUG_INSN_P (next)) + int bb_dist = distance_agu_use_in_bb (regno0, insn, + distance, BB_HEAD (e->dest), + &found_in_bb, &redefined_in_bb); + if (found_in_bb) { - distance++; - - for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++) - if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD - || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE) - && regno0 == DF_REF_REGNO (*use_rec)) - { - /* Return DISTANCE if OP0 is used in memory - address in NEXT. */ - return distance; - } - - for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++) - if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF - && !DF_REF_IS_ARTIFICIAL (*def_rec) - && regno0 == DF_REF_REGNO (*def_rec)) - { - /* Return -1 if OP0 is set in NEXT. */ - return -1; - } - + if (shortest_dist < 0) + shortest_dist = bb_dist; + else if (bb_dist > 0) + shortest_dist = MIN (bb_dist, shortest_dist); } - next = NEXT_INSN (next); + + found = found || found_in_bb; } + + distance = shortest_dist; } } - return -1; + if (!found || redefined) + distance = -1; + else + distance = distance >> 1; + + return distance; } /* Define this macro to tune LEA priority vs ADD, it take effect when @@ -16165,7 +16289,309 @@ distance_agu_use (unsigned int regno0, rtx insn) Negative value: ADD is more preferred than LEA Zero: Netrual Positive value: LEA is more preferred than ADD*/ -#define IX86_LEA_PRIORITY 2 +#define IX86_LEA_PRIORITY 0 + +/* Return true if usage of lea INSN has performance advantage + over a sequence of instructions. Instructions sequence has + SPLIT_COST cycles higher latency than lea latency. */ + +bool +ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1, + unsigned int regno2, unsigned int split_cost) +{ + int dist_define, dist_use; + + dist_define = distance_non_agu_define (regno1, regno2, insn); + dist_use = distance_agu_use (regno0, insn); + + if (dist_define < 0 || dist_define >= LEA_MAX_STALL) + { + /* If there is no non AGU operand definition, no AGU + operand usage and split cost is 0 then both lea + and non lea variants have same priority. Currently + we prefer lea for 64 bit code and non lea on 32 bit + code. */ + if (dist_use < 0 && split_cost == 0) + return TARGET_64BIT || IX86_LEA_PRIORITY; + else + return true; + } + + /* With longer definitions distance lea is more preferable. + Here we change it to take into account splitting cost and + lea priority. */ + dist_define += split_cost + IX86_LEA_PRIORITY; + + /* If there is no use in memory addess then we just check + that split cost does not exceed AGU stall. */ + if (dist_use < 0) + return dist_define >= LEA_MAX_STALL; + + /* If this insn has both backward non-agu dependence and forward + agu dependence, the one with short distance takes effect. */ + return dist_define >= dist_use; +} + +/* Return true if it is legal to clobber flags by INSN and + false otherwise. */ + +static bool +ix86_ok_to_clobber_flags(rtx insn) +{ + basic_block bb = BLOCK_FOR_INSN (insn); + df_ref *use; + bitmap live; + + while (insn) + { + if (NONDEBUG_INSN_P (insn)) + { + for (use = DF_INSN_USES (insn); *use; use++) + if (DF_REF_REG_USE_P (*use) && DF_REF_REGNO (*use) == FLAGS_REG) + return false; + + if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn)) + return true; + } + + if (insn == BB_END (bb)) + break; + + insn = NEXT_INSN (insn); + } + + live = df_get_live_out(bb); + return !REGNO_REG_SET_P (live, FLAGS_REG); +} + +/* Return true if we need to split op0 = op1 + op2 into a sequence of + move and add to avoid AGU stalls. */ + +bool +ix86_avoid_lea_for_add (rtx insn, rtx operands[]) +{ + unsigned int regno0 = true_regnum (operands[0]); + unsigned int regno1 = true_regnum (operands[1]); + unsigned int regno2 = true_regnum (operands[2]); + + /* Check if we need to optimize. */ + if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun)) + return false; + + /* Check it is correct to split here. */ + if (!ix86_ok_to_clobber_flags(insn)) + return false; + + /* We need to split only adds with non destructive + destination operand. */ + if (regno0 == regno1 || regno0 == regno2) + return false; + else + return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1); +} + +/* Return true if we need to split lea into a sequence of + instructions to avoid AGU stalls. */ + +bool +ix86_avoid_lea_for_addr (rtx insn, rtx operands[]) +{ + unsigned int regno0 = true_regnum (operands[0]) ; + unsigned int regno1 = -1; + unsigned int regno2 = -1; + unsigned int split_cost = 0; + struct ix86_address parts; + int ok; + + /* Check we need to optimize. */ + if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun)) + return false; + + /* Check it is correct to split here. */ + if (!ix86_ok_to_clobber_flags(insn)) + return false; + + ok = ix86_decompose_address (operands[1], &parts); + gcc_assert (ok); + + /* We should not split into add if non legitimate pic + operand is used as displacement. */ + if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp)) + return false; + + if (parts.base) + regno1 = true_regnum (parts.base); + if (parts.index) + regno2 = true_regnum (parts.index); + + /* Compute how many cycles we will add to execution time + if split lea into a sequence of instructions. */ + if (parts.base || parts.index) + { + /* Have to use mov instruction if non desctructive + destination form is used. */ + if (regno1 != regno0 && regno2 != regno0) + split_cost += 1; + + /* Have to add index to base if both exist. */ + if (parts.base && parts.index) + split_cost += 1; + + /* Have to use shift and adds if scale is 2 or greater. */ + if (parts.scale > 1) + { + if (regno0 != regno1) + split_cost += 1; + else if (regno2 == regno0) + split_cost += 4; + else + split_cost += parts.scale; + } + + /* Have to use add instruction with immediate if + disp is non zero. */ + if (parts.disp && parts.disp != const0_rtx) + split_cost += 1; + + /* Subtract the price of lea. */ + split_cost -= 1; + } + + return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost); +} + +/* Split lea instructions into a sequence of instructions + which are executed on ALU to avoid AGU stalls. + It is assumed that it is allowed to clobber flags register + at lea position. */ + +extern void +ix86_split_lea_for_addr (rtx operands[], enum machine_mode mode) +{ + unsigned int regno0 = true_regnum (operands[0]) ; + unsigned int regno1 = INVALID_REGNUM; + unsigned int regno2 = INVALID_REGNUM; + struct ix86_address parts; + rtx tmp, clob; + rtvec par; + int ok, adds; + + ok = ix86_decompose_address (operands[1], &parts); + gcc_assert (ok); + + if (parts.base) + { + if (GET_MODE (parts.base) != mode) + parts.base = gen_rtx_SUBREG (mode, parts.base, 0); + regno1 = true_regnum (parts.base); + } + + if (parts.index) + { + if (GET_MODE (parts.index) != mode) + parts.index = gen_rtx_SUBREG (mode, parts.index, 0); + regno2 = true_regnum (parts.index); + } + + if (parts.scale > 1) + { + /* Case r1 = r1 + ... */ + if (regno1 == regno0) + { + /* If we have a case r1 = r1 + C * r1 then we + should use multiplication which is very + expensive. Assume cost model is wrong if we + have such case here. */ + gcc_assert (regno2 != regno0); + + for (adds = parts.scale; adds > 0; adds--) + { + tmp = gen_rtx_PLUS (mode, operands[0], parts.index); + tmp = gen_rtx_SET (VOIDmode, operands[0], tmp); + clob = gen_rtx_CLOBBER (VOIDmode, + gen_rtx_REG (CCmode, FLAGS_REG)); + par = gen_rtvec (2, tmp, clob); + emit_insn (gen_rtx_PARALLEL (VOIDmode, par)); + } + } + else + { + /* r1 = r2 + r3 * C case. Need to move r3 into r1. */ + if (regno0 != regno2) + emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.index)); + + /* Use shift for scaling. */ + tmp = gen_rtx_ASHIFT (mode, operands[0], + GEN_INT (exact_log2 (parts.scale))); + tmp = gen_rtx_SET (VOIDmode, operands[0], tmp); + clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); + par = gen_rtvec (2, tmp, clob); + emit_insn (gen_rtx_PARALLEL (VOIDmode, par)); + + if (parts.base) + { + tmp = gen_rtx_PLUS (mode, operands[0], parts.base); + tmp = gen_rtx_SET (VOIDmode, operands[0], tmp); + clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); + par = gen_rtvec (2, tmp, clob); + emit_insn (gen_rtx_PARALLEL (VOIDmode, par)); + } + + if (parts.disp && parts.disp != const0_rtx) + { + tmp = gen_rtx_PLUS (mode, operands[0], parts.disp); + tmp = gen_rtx_SET (VOIDmode, operands[0], tmp); + clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); + par = gen_rtvec (2, tmp, clob); + emit_insn (gen_rtx_PARALLEL (VOIDmode, par)); + } + } + } + else if (!parts.base && !parts.index) + { + gcc_assert(parts.disp); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.disp)); + } + else + { + if (!parts.base) + { + if (regno0 != regno2) + emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.index)); + } + else if (!parts.index) + { + if (regno0 != regno1) + emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.base)); + } + else + { + if (regno0 == regno1) + tmp = gen_rtx_PLUS (mode, operands[0], parts.index); + else if (regno0 == regno2) + tmp = gen_rtx_PLUS (mode, operands[0], parts.base); + else + { + emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.base)); + tmp = gen_rtx_PLUS (mode, operands[0], parts.index); + } + + tmp = gen_rtx_SET (VOIDmode, operands[0], tmp); + clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); + par = gen_rtvec (2, tmp, clob); + emit_insn (gen_rtx_PARALLEL (VOIDmode, par)); + } + + if (parts.disp && parts.disp != const0_rtx) + { + tmp = gen_rtx_PLUS (mode, operands[0], parts.disp); + tmp = gen_rtx_SET (VOIDmode, operands[0], tmp); + clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); + par = gen_rtvec (2, tmp, clob); + emit_insn (gen_rtx_PARALLEL (VOIDmode, par)); + } + } +} /* Return true if it is ok to optimize an ADD operation to LEA operation to avoid flag register consumation. For most processors, @@ -16186,26 +16612,8 @@ ix86_lea_for_add_ok (rtx insn, rtx operands[]) if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun)) return false; - else - { - int dist_define, dist_use; - /* Return false if REGNO0 isn't used in memory address. */ - dist_use = distance_agu_use (regno0, insn); - if (dist_use <= 0) - return false; - - dist_define = distance_non_agu_define (regno1, regno2, insn); - if (dist_define <= 0) - return true; - - /* If this insn has both backward non-agu dependence and forward - agu dependence, the one with short distance take effect. */ - if ((dist_define + IX86_LEA_PRIORITY) < dist_use) - return false; - - return true; - } + return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0); } /* Return true if destination reg of SET_BODY is shift count of diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 37491ae..52c57fa 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -5463,19 +5463,31 @@ [(set_attr "type" "alu") (set_attr "mode" "QI")]) -(define_insn "*lea_1" +(define_insn_and_split "*lea_1" [(set (match_operand:SI 0 "register_operand" "=r") (subreg:SI (match_operand:DI 1 "lea_address_operand" "p") 0))] "TARGET_64BIT" "lea{l}\t{%a1, %0|%0, %a1}" + "&& reload_completed && ix86_avoid_lea_for_addr (insn, operands)" + [(const_int 0)] +{ + ix86_split_lea_for_addr (operands, SImode); + DONE; +} [(set_attr "type" "lea") (set_attr "mode" "SI")]) -(define_insn "*lea<mode>_2" +(define_insn_and_split "*lea<mode>_2" [(set (match_operand:SWI48 0 "register_operand" "=r") (match_operand:SWI48 1 "lea_address_operand" "p"))] "" "lea{<imodesuffix>}\t{%a1, %0|%0, %a1}" + "reload_completed && ix86_avoid_lea_for_addr (insn, operands)" + [(const_int 0)] +{ + ix86_split_lea_for_addr (operands, <MODE>mode); + DONE; +} [(set_attr "type" "lea") (set_attr "mode" "<MODE>")]) @@ -5777,6 +5789,17 @@ (const_string "none"))) (set_attr "mode" "QI")]) +;; Split non destructive adds if we cannot use lea. +(define_split + [(set (match_operand:SWI48 0 "register_operand" "") + (plus:SWI48 (match_operand:SWI48 1 "register_operand" "") + (match_operand:SWI48 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed && ix86_avoid_lea_for_add (insn, operands)" + [(set (match_dup 0) (match_dup 1)) + (parallel [(set (match_dup 0) (plus:<MODE> (match_dup 0) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])]) + ;; Convert add to the lea pattern to avoid flags dependency. (define_split [(set (match_operand:SWI 0 "register_operand" "") |