diff options
-rw-r--r-- | gcc/Makefile.in | 1 | ||||
-rw-r--r-- | gcc/common.opt | 4 | ||||
-rw-r--r-- | gcc/doc/invoke.texi | 8 | ||||
-rw-r--r-- | gcc/fold-mem-offsets.cc | 901 | ||||
-rw-r--r-- | gcc/passes.def | 1 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr52146.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/riscv/fold-mem-offsets-1.c | 16 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/riscv/fold-mem-offsets-2.c | 24 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/riscv/fold-mem-offsets-3.c | 17 | ||||
-rw-r--r-- | gcc/tree-pass.h | 1 |
10 files changed, 974 insertions, 1 deletions
diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 9cc1626..747f749 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1443,6 +1443,7 @@ OBJS = \ fixed-value.o \ fold-const.o \ fold-const-call.o \ + fold-mem-offsets.o \ function.o \ function-abi.o \ function-tests.o \ diff --git a/gcc/common.opt b/gcc/common.opt index f137a1f..b103b8d 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -1252,6 +1252,10 @@ fcprop-registers Common Var(flag_cprop_registers) Optimization Perform a register copy-propagation optimization pass. +ffold-mem-offsets +Target Bool Var(flag_fold_mem_offsets) Init(1) +Fold instructions calculating memory offsets to the memory access instruction if possible. + fcrossjumping Common Var(flag_crossjumping) Optimization Perform cross-jumping optimization. diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index eb714d1..a26dc5a 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -543,6 +543,7 @@ Objective-C and Objective-C++ Dialects}. -fauto-inc-dec -fbranch-probabilities -fcaller-saves -fcombine-stack-adjustments -fconserve-stack +-ffold-mem-offsets -fcompare-elim -fcprop-registers -fcrossjumping -fcse-follow-jumps -fcse-skip-blocks -fcx-fortran-rules -fcx-limited-range @@ -14395,6 +14396,13 @@ the comparison operation before register allocation is complete. Enabled at levels @option{-O1}, @option{-O2}, @option{-O3}, @option{-Os}. +@opindex ffold-mem-offsets +@item -ffold-mem-offsets +@itemx -fno-fold-mem-offsets +Try to eliminate add instructions by folding them in memory loads/stores. + +Enabled at levels @option{-O2}, @option{-O3}. + @opindex fcprop-registers @item -fcprop-registers After register allocation and post-register allocation instruction splitting, diff --git a/gcc/fold-mem-offsets.cc b/gcc/fold-mem-offsets.cc new file mode 100644 index 0000000..6263fc7 --- /dev/null +++ b/gcc/fold-mem-offsets.cc @@ -0,0 +1,901 @@ +/* Late RTL pass to fold memory offsets. + Copyright (C) 2023 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rtl.h" +#include "tree.h" +#include "expr.h" +#include "backend.h" +#include "regs.h" +#include "target.h" +#include "memmodel.h" +#include "emit-rtl.h" +#include "insn-config.h" +#include "recog.h" +#include "predict.h" +#include "df.h" +#include "tree-pass.h" +#include "cfgrtl.h" + +/* This pass tries to optimize memory offset calculations by moving constants + from add instructions to the memory instructions (loads / stores). + For example it can transform code like this: + + add t4, sp, 16 + add t2, a6, t4 + shl t3, t2, 1 + ld a2, 0(t3) + add a2, 1 + sd a2, 8(t2) + + into the following (one instruction less): + + add t2, a6, sp + shl t3, t2, 1 + ld a2, 32(t3) + add a2, 1 + sd a2, 24(t2) + + Although the previous passes try to emit efficient offset calculations + this pass is still beneficial because: + + - The mechanisms that optimize memory offsets usually work with specific + patterns or have limitations. This pass is designed to fold offsets + through complex calculations that affect multiple memory operations + and have partially overlapping calculations. + + - There are cases where add instructions are introduced in late rtl passes + and the rest of the pipeline cannot eliminate them. Arrays and structs + allocated on the stack can result in unwanted add instructions that + cannot be eliminated easily. + + This pass works on a basic block level and consists of 4 phases: + + - Phase 1 (Analysis): Find "foldable" instructions. + Foldable instructions are those that we know how to propagate + a constant addition through (add, shift, move, ...) and only have other + foldable instructions for uses. In that phase a DFS traversal on the + definition tree is performed and foldable instructions are marked on + a bitmap. The add immediate instructions that are reachable in this + DFS are candidates for folding since all the intermediate calculations + affected by them are also foldable. + + - Phase 2 (Validity): Traverse and calculate the offsets that would result + from folding the add immediate instructions. Check whether the + calculated offsets result in a valid instruction for the target. + + - Phase 3 (Commit offsets): Traverse again. It is now known which folds + are valid so at this point change the offsets in the memory instructions. + + - Phase 4 (Commit instruction deletions): Scan all instructions and delete + or simplify (reduce to move) all add immediate instructions that were + folded. + + This pass should run before hard register propagation because it creates + register moves that we expect to be eliminated. */ + +namespace { + +const pass_data pass_data_fold_mem = +{ + RTL_PASS, /* type */ + "fold_mem_offsets", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_NONE, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_df_finish, /* todo_flags_finish */ +}; + +class pass_fold_mem_offsets : public rtl_opt_pass +{ +public: + pass_fold_mem_offsets (gcc::context *ctxt) + : rtl_opt_pass (pass_data_fold_mem, ctxt) + {} + + /* opt_pass methods: */ + virtual bool gate (function *) + { + return flag_fold_mem_offsets && optimize >= 2; + } + + virtual unsigned int execute (function *); +}; // class pass_fold_mem_offsets + +/* Class that holds in FOLD_INSNS the instructions that if folded the offset + of a memory instruction would increase by ADDED_OFFSET. */ +class fold_mem_info { +public: + auto_bitmap fold_insns; + HOST_WIDE_INT added_offset; +}; + +typedef hash_map<rtx_insn *, fold_mem_info *> fold_info_map; + +/* Tracks which instructions can be reached through instructions that can + propagate offsets for folding. */ +static bitmap_head can_fold_insns; + +/* Marks instructions that are currently eligible for folding. */ +static bitmap_head candidate_fold_insns; + +/* Tracks instructions that cannot be folded because it turned out that + folding will result in creating an invalid memory instruction. + An instruction can be in both CANDIDATE_FOLD_INSNS and CANNOT_FOLD_INSNS + at the same time, in which case it is not legal to fold. */ +static bitmap_head cannot_fold_insns; + +/* The number of instructions that were simplified or eliminated. */ +static int stats_fold_count; + +/* Get the single reaching definition of an instruction inside a BB. + The definition is desired for REG used in INSN. + Return the definition insn or NULL if there's no definition with + the desired criteria. */ +static rtx_insn* +get_single_def_in_bb (rtx_insn *insn, rtx reg) +{ + df_ref use; + struct df_link *ref_chain, *ref_link; + + FOR_EACH_INSN_USE (use, insn) + { + if (GET_CODE (DF_REF_REG (use)) == SUBREG) + return NULL; + if (REGNO (DF_REF_REG (use)) == REGNO (reg)) + break; + } + + if (!use) + return NULL; + + ref_chain = DF_REF_CHAIN (use); + + if (!ref_chain) + return NULL; + + for (ref_link = ref_chain; ref_link; ref_link = ref_link->next) + { + /* Problem getting some definition for this instruction. */ + if (ref_link->ref == NULL) + return NULL; + if (DF_REF_INSN_INFO (ref_link->ref) == NULL) + return NULL; + if (global_regs[REGNO (reg)] + && !set_of (reg, DF_REF_INSN (ref_link->ref))) + return NULL; + } + + if (ref_chain->next) + return NULL; + + rtx_insn *def = DF_REF_INSN (ref_chain->ref); + + if (BLOCK_FOR_INSN (def) != BLOCK_FOR_INSN (insn)) + return NULL; + + if (DF_INSN_LUID (def) > DF_INSN_LUID (insn)) + return NULL; + + return def; +} + +/* Get all uses of REG which is set in INSN. Return the use list or NULL if a + use is missing / irregular. If SUCCESS is not NULL then set it to false if + there are missing / irregular uses and true otherwise. */ +static struct df_link* +get_uses (rtx_insn *insn, rtx reg, bool *success) +{ + df_ref def; + struct df_link *ref_chain, *ref_link; + + if (success) + *success = false; + + FOR_EACH_INSN_DEF (def, insn) + if (REGNO (DF_REF_REG (def)) == REGNO (reg)) + break; + + if (!def) + return NULL; + + ref_chain = DF_REF_CHAIN (def); + + for (ref_link = ref_chain; ref_link; ref_link = ref_link->next) + { + /* Problem getting a use for this instruction. */ + if (ref_link->ref == NULL) + return NULL; + if (DF_REF_CLASS (ref_link->ref) != DF_REF_REGULAR) + return NULL; + /* We do not handle REG_EQUIV/REG_EQ notes for now. */ + if (DF_REF_FLAGS (ref_link->ref) & DF_REF_IN_NOTE) + return NULL; + } + + if (success) + *success = true; + + return ref_chain; +} + +static HOST_WIDE_INT +fold_offsets (rtx_insn *insn, rtx reg, bool analyze, bitmap foldable_insns); + +/* Helper function for fold_offsets. + + If DO_RECURSION is false and ANALYZE is true this function returns true iff + it understands the structure of INSN and knows how to propagate constants + through it. In this case OFFSET_OUT and FOLDABLE_INSNS are unused. + + If DO_RECURSION is true then it also calls fold_offsets for each recognized + part of INSN with the appropriate arguments. + + If DO_RECURSION is true and ANALYZE is false then offset that would result + from folding is computed and is returned through the pointer OFFSET_OUT. + The instructions that can be folded are recorded in FOLDABLE_INSNS. +*/ +static bool +fold_offsets_1 (rtx_insn *insn, bool analyze, bool do_recursion, + HOST_WIDE_INT *offset_out, bitmap foldable_insns) +{ + /* Doesn't make sense if both DO_RECURSION and ANALYZE are false. */ + gcc_checking_assert (do_recursion || analyze); + gcc_checking_assert (GET_CODE (PATTERN (insn)) == SET); + + rtx src = SET_SRC (PATTERN (insn)); + HOST_WIDE_INT offset = 0; + + switch (GET_CODE (src)) + { + case PLUS: + { + /* Propagate through add. */ + rtx arg1 = XEXP (src, 0); + rtx arg2 = XEXP (src, 1); + + if (REG_P (arg1)) + { + if (do_recursion) + offset += fold_offsets (insn, arg1, analyze, foldable_insns); + } + else if (GET_CODE (arg1) == ASHIFT + && REG_P (XEXP (arg1, 0)) + && CONST_INT_P (XEXP (arg1, 1))) + { + /* Handle R1 = (R2 << C) + ... */ + if (do_recursion) + { + HOST_WIDE_INT scale + = (HOST_WIDE_INT_1U << INTVAL (XEXP (arg1, 1))); + offset += scale * fold_offsets (insn, XEXP (arg1, 0), analyze, + foldable_insns); + } + } + else if (GET_CODE (arg1) == PLUS + && REG_P (XEXP (arg1, 0)) + && REG_P (XEXP (arg1, 1))) + { + /* Handle R1 = (R2 + R3) + ... */ + if (do_recursion) + { + offset += fold_offsets (insn, XEXP (arg1, 0), analyze, + foldable_insns); + offset += fold_offsets (insn, XEXP (arg1, 1), analyze, + foldable_insns); + } + } + else if (GET_CODE (arg1) == PLUS + && GET_CODE (XEXP (arg1, 0)) == ASHIFT + && REG_P (XEXP (XEXP (arg1, 0), 0)) + && CONST_INT_P (XEXP (XEXP (arg1, 0), 1)) + && REG_P (XEXP (arg1, 1))) + { + /* Handle R1 = ((R2 << C) + R3) + ... */ + if (do_recursion) + { + HOST_WIDE_INT scale + = (HOST_WIDE_INT_1U << INTVAL (XEXP (XEXP (arg1, 0), 1))); + offset += scale * fold_offsets (insn, XEXP (XEXP (arg1, 0), 0), + analyze, foldable_insns); + offset += fold_offsets (insn, XEXP (arg1, 1), analyze, + foldable_insns); + } + } + else + return false; + + if (REG_P (arg2)) + { + if (do_recursion) + offset += fold_offsets (insn, arg2, analyze, foldable_insns); + } + else if (CONST_INT_P (arg2)) + { + if (REG_P (arg1)) + { + offset += INTVAL (arg2); + /* This is a R1 = R2 + C instruction, candidate for folding. */ + if (!analyze) + bitmap_set_bit (foldable_insns, INSN_UID (insn)); + } + } + else + return false; + + /* Pattern recognized for folding. */ + break; + } + case MINUS: + { + /* Propagate through minus. */ + rtx arg1 = XEXP (src, 0); + rtx arg2 = XEXP (src, 1); + + if (REG_P (arg1)) + { + if (do_recursion) + offset += fold_offsets (insn, arg1, analyze, foldable_insns); + } + else + return false; + + if (REG_P (arg2)) + { + if (do_recursion) + offset -= fold_offsets (insn, arg2, analyze, foldable_insns); + } + else if (CONST_INT_P (arg2)) + { + if (REG_P (arg1)) + { + offset -= INTVAL (arg2); + /* This is a R1 = R2 - C instruction, candidate for folding. */ + if (!analyze) + bitmap_set_bit (foldable_insns, INSN_UID (insn)); + } + } + else + return false; + + /* Pattern recognized for folding. */ + break; + } + case NEG: + { + /* Propagate through negation. */ + rtx arg1 = XEXP (src, 0); + if (REG_P (arg1)) + { + if (do_recursion) + offset = -fold_offsets (insn, arg1, analyze, foldable_insns); + } + else + return false; + + /* Pattern recognized for folding. */ + break; + } + case MULT: + { + /* Propagate through multiply by constant. */ + rtx arg1 = XEXP (src, 0); + rtx arg2 = XEXP (src, 1); + + if (REG_P (arg1) && CONST_INT_P (arg2)) + { + if (do_recursion) + { + HOST_WIDE_INT scale = INTVAL (arg2); + offset = scale * fold_offsets (insn, arg1, analyze, + foldable_insns); + } + } + else + return false; + + /* Pattern recognized for folding. */ + break; + } + case ASHIFT: + { + /* Propagate through shift left by constant. */ + rtx arg1 = XEXP (src, 0); + rtx arg2 = XEXP (src, 1); + + if (REG_P (arg1) && CONST_INT_P (arg2)) + { + if (do_recursion) + { + HOST_WIDE_INT scale = (HOST_WIDE_INT_1U << INTVAL (arg2)); + offset = scale * fold_offsets (insn, arg1, analyze, + foldable_insns); + } + } + else + return false; + + /* Pattern recognized for folding. */ + break; + } + case REG: + { + /* Propagate through register move. */ + if (do_recursion) + offset = fold_offsets (insn, src, analyze, foldable_insns); + + /* Pattern recognized for folding. */ + break; + } + case CONST_INT: + { + offset = INTVAL (src); + /* R1 = C is candidate for folding. */ + if (!analyze) + bitmap_set_bit (foldable_insns, INSN_UID (insn)); + + /* Pattern recognized for folding. */ + break; + } + default: + /* Cannot recognize. */ + return false; + } + + if (do_recursion && !analyze) + *offset_out = offset; + + return true; +} + +/* Function that computes the offset that would have to be added to all uses + of REG if the instructions marked in FOLDABLE_INSNS were to be eliminated. + + If ANALYZE is true then mark in CAN_FOLD_INSNS which instructions + transitively only affect other instructions found in CAN_FOLD_INSNS. + If ANALYZE is false then compute the offset required for folding. */ +static HOST_WIDE_INT +fold_offsets (rtx_insn *insn, rtx reg, bool analyze, bitmap foldable_insns) +{ + rtx_insn *def = get_single_def_in_bb (insn, reg); + + if (!def || GET_CODE (PATTERN (def)) != SET) + return 0; + + rtx dest = SET_DEST (PATTERN (def)); + + if (!REG_P (dest)) + return 0; + + /* We can only affect the values of GPR registers. */ + unsigned int dest_regno = REGNO (dest); + if (fixed_regs[dest_regno] + || !TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], dest_regno)) + return 0; + + if (analyze) + { + /* Check if we know how to handle DEF. */ + if (!fold_offsets_1 (def, true, false, NULL, NULL)) + return 0; + + /* We only fold through instructions that are transitively used as + memory addresses and do not have other uses. Use the same logic + from offset calculation to visit instructions that can propagate + offsets and keep track of them in CAN_FOLD_INSNS. */ + bool success; + struct df_link *uses = get_uses (def, dest, &success), *ref_link; + + if (!success) + return 0; + + for (ref_link = uses; ref_link; ref_link = ref_link->next) + { + rtx_insn *use = DF_REF_INSN (ref_link->ref); + + if (DEBUG_INSN_P (use)) + continue; + + /* Punt if the use is anything more complicated than a set + (clobber, use, etc). */ + if (!NONJUMP_INSN_P (use) || GET_CODE (PATTERN (use)) != SET) + return 0; + + /* This use affects instructions outside of CAN_FOLD_INSNS. */ + if (!bitmap_bit_p (&can_fold_insns, INSN_UID (use))) + return 0; + + rtx use_set = PATTERN (use); + + /* Special case: A foldable memory store is not foldable if it + mentions DEST outside of the address calculation. */ + if (use_set && MEM_P (SET_DEST (use_set)) + && reg_mentioned_p (dest, SET_SRC (use_set))) + return 0; + } + + bitmap_set_bit (&can_fold_insns, INSN_UID (def)); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Instruction marked for propagation: "); + print_rtl_single (dump_file, def); + } + } + else + { + /* We cannot propagate through this instruction. */ + if (!bitmap_bit_p (&can_fold_insns, INSN_UID (def))) + return 0; + } + + HOST_WIDE_INT offset = 0; + bool recognized = fold_offsets_1 (def, analyze, true, &offset, + foldable_insns); + + if (!recognized) + return 0; + + return offset; +} + +/* Test if INSN is a memory load / store that can have an offset folded to it. + Return true iff INSN is such an instruction and return through MEM_OUT, + REG_OUT and OFFSET_OUT the RTX that has a MEM code, the register that is + used as a base address and the offset accordingly. + All of the out pointers may be NULL in which case they will be ignored. */ +bool +get_fold_mem_root (rtx_insn *insn, rtx *mem_out, rtx *reg_out, + HOST_WIDE_INT *offset_out) +{ + rtx set = single_set (insn); + rtx mem = NULL_RTX; + + if (set != NULL_RTX) + { + rtx src = SET_SRC (set); + rtx dest = SET_DEST (set); + + /* Don't fold when we have unspec / volatile. */ + if (GET_CODE (src) == UNSPEC + || GET_CODE (src) == UNSPEC_VOLATILE + || GET_CODE (dest) == UNSPEC + || GET_CODE (dest) == UNSPEC_VOLATILE) + return false; + + if (MEM_P (src)) + mem = src; + else if (MEM_P (dest)) + mem = dest; + else if ((GET_CODE (src) == SIGN_EXTEND + || GET_CODE (src) == ZERO_EXTEND) + && MEM_P (XEXP (src, 0))) + mem = XEXP (src, 0); + } + + if (mem == NULL_RTX) + return false; + + rtx mem_addr = XEXP (mem, 0); + rtx reg; + HOST_WIDE_INT offset; + + if (REG_P (mem_addr)) + { + reg = mem_addr; + offset = 0; + } + else if (GET_CODE (mem_addr) == PLUS + && REG_P (XEXP (mem_addr, 0)) + && CONST_INT_P (XEXP (mem_addr, 1))) + { + reg = XEXP (mem_addr, 0); + offset = INTVAL (XEXP (mem_addr, 1)); + } + else + return false; + + if (mem_out) + *mem_out = mem; + if (reg_out) + *reg_out = reg; + if (offset_out) + *offset_out = offset; + + return true; +} + +/* If INSN is a root memory instruction then do a DFS traversal on its + definitions and find folding candidates. */ +static void +do_analysis (rtx_insn *insn) +{ + rtx reg; + if (!get_fold_mem_root (insn, NULL, ®, NULL)) + return; + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Starting analysis from root: "); + print_rtl_single (dump_file, insn); + } + + /* Analyse folding opportunities for this memory instruction. */ + bitmap_set_bit (&can_fold_insns, INSN_UID (insn)); + fold_offsets (insn, reg, true, NULL); +} + +static void +do_fold_info_calculation (rtx_insn *insn, fold_info_map *fold_info) +{ + rtx mem, reg; + HOST_WIDE_INT cur_offset; + if (!get_fold_mem_root (insn, &mem, ®, &cur_offset)) + return; + + fold_mem_info *info = new fold_mem_info; + info->added_offset = fold_offsets (insn, reg, false, info->fold_insns); + + fold_info->put (insn, info); +} + +/* If INSN is a root memory instruction then compute a potentially new offset + for it and test if the resulting instruction is valid. */ +static void +do_check_validity (rtx_insn *insn, fold_mem_info *info) +{ + rtx mem, reg; + HOST_WIDE_INT cur_offset; + if (!get_fold_mem_root (insn, &mem, ®, &cur_offset)) + return; + + HOST_WIDE_INT new_offset = cur_offset + info->added_offset; + + /* Test if it is valid to change MEM's address offset to NEW_OFFSET. */ + int icode = INSN_CODE (insn); + INSN_CODE (insn) = -1; + rtx mem_addr = XEXP (mem, 0); + machine_mode mode = GET_MODE (mem_addr); + if (new_offset != 0) + XEXP (mem, 0) = gen_rtx_PLUS (mode, reg, gen_int_mode (new_offset, mode)); + else + XEXP (mem, 0) = reg; + + bool illegal = insn_invalid_p (insn, false) + || !memory_address_addr_space_p (mode, XEXP (mem, 0), + MEM_ADDR_SPACE (mem)); + + /* Restore the instruction. */ + XEXP (mem, 0) = mem_addr; + INSN_CODE (insn) = icode; + + if (illegal) + bitmap_ior_into (&cannot_fold_insns, info->fold_insns); + else + bitmap_ior_into (&candidate_fold_insns, info->fold_insns); +} + +static bool +compute_validity_closure (fold_info_map *fold_info) +{ + /* Let's say we have an arbitrary chain of foldable instructions xN = xN + C + and memory operations rN that use xN as shown below. If folding x1 in r1 + turns out to be invalid for whatever reason then it's also invalid to fold + any of the other xN into any rN. That means that we need the transitive + closure of validity to determine whether we can fold a xN instruction. + + +--------------+ +-------------------+ +-------------------+ + | r1 = mem[x1] | | r2 = mem[x1 + x2] | | r3 = mem[x2 + x3] | ... + +--------------+ +-------------------+ +-------------------+ + ^ ^ ^ ^ ^ + | / | / | ... + | / | / | + +-------------+ / +-------------+ / +-------------+ + | x1 = x1 + 1 |-----+ | x2 = x2 + 1 |-----+ | x3 = x3 + 1 |--- ... + +-------------+ +-------------+ +-------------+ + ^ ^ ^ + | | | + ... ... ... + */ + + /* In general three iterations should be enough for most cases, but allow up + to five when -fexpensive-optimizations is used. */ + int max_iters = 3 + 2 * flag_expensive_optimizations; + for (int pass = 0; pass < max_iters; pass++) + { + bool made_changes = false; + for (fold_info_map::iterator iter = fold_info->begin (); + iter != fold_info->end (); ++iter) + { + fold_mem_info *info = (*iter).second; + if (bitmap_intersect_p (&cannot_fold_insns, info->fold_insns)) + made_changes |= bitmap_ior_into (&cannot_fold_insns, + info->fold_insns); + } + + if (!made_changes) + return true; + } + + return false; +} + +/* If INSN is a root memory instruction that was affected by any folding + then update its offset as necessary. */ +static void +do_commit_offset (rtx_insn *insn, fold_mem_info *info) +{ + rtx mem, reg; + HOST_WIDE_INT cur_offset; + if (!get_fold_mem_root (insn, &mem, ®, &cur_offset)) + return; + + HOST_WIDE_INT new_offset = cur_offset + info->added_offset; + + if (new_offset == cur_offset) + return; + + gcc_assert (!bitmap_empty_p (info->fold_insns)); + + if (bitmap_intersect_p (&cannot_fold_insns, info->fold_insns)) + return; + + if (dump_file) + { + fprintf (dump_file, "Memory offset changed from " + HOST_WIDE_INT_PRINT_DEC " to " HOST_WIDE_INT_PRINT_DEC + " for instruction:\n", cur_offset, new_offset); + print_rtl_single (dump_file, insn); + } + + machine_mode mode = GET_MODE (XEXP (mem, 0)); + if (new_offset != 0) + XEXP (mem, 0) = gen_rtx_PLUS (mode, reg, gen_int_mode (new_offset, mode)); + else + XEXP (mem, 0) = reg; + INSN_CODE (insn) = recog (PATTERN (insn), insn, 0); + df_insn_rescan (insn); +} + +/* If INSN is a move / add instruction that was folded then replace its + constant part with zero. */ +static void +do_commit_insn (rtx_insn *insn) +{ + if (bitmap_bit_p (&candidate_fold_insns, INSN_UID (insn)) + && !bitmap_bit_p (&cannot_fold_insns, INSN_UID (insn))) + { + if (dump_file) + { + fprintf (dump_file, "Instruction folded:"); + print_rtl_single (dump_file, insn); + } + + stats_fold_count++; + + rtx set = single_set (insn); + rtx dest = SET_DEST (set); + rtx src = SET_SRC (set); + + /* Emit a move and let subsequent passes eliminate it if possible. */ + if (GET_CODE (src) == CONST_INT) + { + /* INSN is R1 = C. + Replace it with R1 = 0 because C was folded. */ + rtx mov_rtx + = gen_move_insn (dest, gen_int_mode (0, GET_MODE (dest))); + df_insn_rescan (emit_insn_after (mov_rtx, insn)); + } + else + { + /* INSN is R1 = R2 + C. + Replace it with R1 = R2 because C was folded. */ + rtx arg1 = XEXP (src, 0); + + /* If the DEST == ARG1 then the move is a no-op. */ + if (REGNO (dest) != REGNO (arg1)) + { + gcc_checking_assert (GET_MODE (dest) == GET_MODE (arg1)); + rtx mov_rtx = gen_move_insn (dest, arg1); + df_insn_rescan (emit_insn_after (mov_rtx, insn)); + } + } + + /* Delete the original move / add instruction. */ + delete_insn (insn); + } +} + +unsigned int +pass_fold_mem_offsets::execute (function *fn) +{ + df_set_flags (DF_EQ_NOTES + DF_RD_PRUNE_DEAD_DEFS + DF_DEFER_INSN_RESCAN); + df_chain_add_problem (DF_UD_CHAIN + DF_DU_CHAIN); + df_analyze (); + + bitmap_initialize (&can_fold_insns, NULL); + bitmap_initialize (&candidate_fold_insns, NULL); + bitmap_initialize (&cannot_fold_insns, NULL); + + stats_fold_count = 0; + + basic_block bb; + rtx_insn *insn; + FOR_ALL_BB_FN (bb, fn) + { + /* There is a conflict between this pass and RISCV's shorten-memrefs + pass. For now disable folding if optimizing for size because + otherwise this cancels the effects of shorten-memrefs. */ + if (optimize_bb_for_size_p (bb)) + continue; + + fold_info_map fold_info; + + bitmap_clear (&can_fold_insns); + bitmap_clear (&candidate_fold_insns); + bitmap_clear (&cannot_fold_insns); + + FOR_BB_INSNS (bb, insn) + do_analysis (insn); + + FOR_BB_INSNS (bb, insn) + do_fold_info_calculation (insn, &fold_info); + + FOR_BB_INSNS (bb, insn) + if (fold_mem_info **info = fold_info.get (insn)) + do_check_validity (insn, *info); + + if (compute_validity_closure (&fold_info)) + { + FOR_BB_INSNS (bb, insn) + if (fold_mem_info **info = fold_info.get (insn)) + do_commit_offset (insn, *info); + + FOR_BB_INSNS (bb, insn) + do_commit_insn (insn); + } + + for (fold_info_map::iterator iter = fold_info.begin (); + iter != fold_info.end (); ++iter) + delete (*iter).second; + } + + statistics_counter_event (cfun, "Number of folded instructions", + stats_fold_count); + + bitmap_release (&can_fold_insns); + bitmap_release (&candidate_fold_insns); + bitmap_release (&cannot_fold_insns); + + return 0; +} + +} // anon namespace + +rtl_opt_pass * +make_pass_fold_mem_offsets (gcc::context *ctxt) +{ + return new pass_fold_mem_offsets (ctxt); +} diff --git a/gcc/passes.def b/gcc/passes.def index 2bafd60..df7965d 100644 --- a/gcc/passes.def +++ b/gcc/passes.def @@ -519,6 +519,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_peephole2); NEXT_PASS (pass_if_after_reload); NEXT_PASS (pass_regrename); + NEXT_PASS (pass_fold_mem_offsets); NEXT_PASS (pass_cprop_hardreg); NEXT_PASS (pass_fast_rtl_dce); NEXT_PASS (pass_reorder_blocks); diff --git a/gcc/testsuite/gcc.target/i386/pr52146.c b/gcc/testsuite/gcc.target/i386/pr52146.c index 9bd8136..03e3e95 100644 --- a/gcc/testsuite/gcc.target/i386/pr52146.c +++ b/gcc/testsuite/gcc.target/i386/pr52146.c @@ -16,4 +16,4 @@ test2 (void) *apic_tpr_addr = 0; } -/* { dg-final { scan-assembler-not "\[,\\t \]+-18874240" } } */ +/* { dg-final { scan-assembler-not "\[,\\t \]+-18874240\n" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/fold-mem-offsets-1.c b/gcc/testsuite/gcc.target/riscv/fold-mem-offsets-1.c new file mode 100644 index 0000000..ffb4993 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/fold-mem-offsets-1.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ffold-mem-offsets" } */ + +void sink(int arr[2]); + +void +foo(int a, int b, int i) +{ + int arr[2] = {a, b}; + arr[i]++; + sink(arr); +} + +/* The should be no negative memory offsets when using -ffold-mem-offsets. */ +/* { dg-final { scan-assembler-not "lw\t.*,-.*\\(.*\\)" } } */ +/* { dg-final { scan-assembler-not "sw\t.*,-.*\\(.*\\)" } } */
\ No newline at end of file diff --git a/gcc/testsuite/gcc.target/riscv/fold-mem-offsets-2.c b/gcc/testsuite/gcc.target/riscv/fold-mem-offsets-2.c new file mode 100644 index 0000000..ca96180 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/fold-mem-offsets-2.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ffold-mem-offsets" } */ + +void sink(int arr[3]); + +void +foo(int a, int b, int c, int i) +{ + int arr1[3] = {a, b, c}; + int arr2[3] = {a, c, b}; + int arr3[3] = {c, b, a}; + + arr1[i]++; + arr2[i]++; + arr3[i]++; + + sink(arr1); + sink(arr2); + sink(arr3); +} + +/* The should be no negative memory offsets when using -ffold-mem-offsets. */ +/* { dg-final { scan-assembler-not "lw\t.*,-.*\\(.*\\)" } } */ +/* { dg-final { scan-assembler-not "sw\t.*,-.*\\(.*\\)" } } */
\ No newline at end of file diff --git a/gcc/testsuite/gcc.target/riscv/fold-mem-offsets-3.c b/gcc/testsuite/gcc.target/riscv/fold-mem-offsets-3.c new file mode 100644 index 0000000..83f82c0 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/fold-mem-offsets-3.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ffold-mem-offsets" } */ + +void load(int arr[2]); + +int +foo(long unsigned int i) +{ + int arr[2]; + load(arr); + + return arr[3 * i + 77]; +} + +/* The should be no negative memory offsets when using -ffold-mem-offsets. */ +/* { dg-final { scan-assembler-not "lw\t.*,-.*\\(.*\\)" } } */ +/* { dg-final { scan-assembler-not "addi\t.*,.*,77" } } */
\ No newline at end of file diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index 9c4b1e4..79a5f33 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -622,6 +622,7 @@ extern rtl_opt_pass *make_pass_sched_fusion (gcc::context *ctxt); extern rtl_opt_pass *make_pass_peephole2 (gcc::context *ctxt); extern rtl_opt_pass *make_pass_if_after_reload (gcc::context *ctxt); extern rtl_opt_pass *make_pass_regrename (gcc::context *ctxt); +extern rtl_opt_pass *make_pass_fold_mem_offsets (gcc::context *ctxt); extern rtl_opt_pass *make_pass_cprop_hardreg (gcc::context *ctxt); extern rtl_opt_pass *make_pass_reorder_blocks (gcc::context *ctxt); extern rtl_opt_pass *make_pass_leaf_regs (gcc::context *ctxt); |