diff options
author | Richard Biener <rguenther@suse.de> | 2019-05-23 11:35:16 +0000 |
---|---|---|
committer | Richard Biener <rguenth@gcc.gnu.org> | 2019-05-23 11:35:16 +0000 |
commit | 5879ab5fafedc8f6f9bfe95a4cf8501b0df90edd (patch) | |
tree | e886bbae2a64b230962aaf93a1c7ceb9b88ece6e /gcc/tree-loop-distribution.c | |
parent | 32d941139e8144662e0dc182d64e3f3f9df89ad4 (diff) | |
download | gcc-5879ab5fafedc8f6f9bfe95a4cf8501b0df90edd.zip gcc-5879ab5fafedc8f6f9bfe95a4cf8501b0df90edd.tar.gz gcc-5879ab5fafedc8f6f9bfe95a4cf8501b0df90edd.tar.bz2 |
re PR tree-optimization/88440 (size optimization of memcpy-like code)
2019-05-23 Richard Biener <rguenther@suse.de>
PR tree-optimization/88440
* opts.c (default_options_table): Enable -ftree-loop-distribute-patterns
at -O[2s]+.
* tree-loop-distribution.c (generate_memset_builtin): Fold the
generated call.
(generate_memcpy_builtin): Likewise.
(distribute_loop): Pass in whether to only distribute patterns.
(prepare_perfect_loop_nest): Also allow size optimization.
(pass_loop_distribution::execute): When optimizing a loop
nest for size allow pattern replacement.
* gcc.dg/tree-ssa/ldist-37.c: New testcase.
* gcc.dg/tree-ssa/ldist-38.c: Likewise.
* gcc.dg/vect/vect.exp: Add -fno-tree-loop-distribute-patterns.
* gcc.dg/tree-ssa/ldist-37.c: Adjust.
* gcc.dg/tree-ssa/ldist-38.c: Likewise.
* g++.dg/tree-ssa/pr78847.C: Likewise.
* gcc.dg/autopar/pr39500-1.c: Likewise.
* gcc.dg/autopar/reduc-1char.c: Likewise.
* gcc.dg/autopar/reduc-7.c: Likewise.
* gcc.dg/tree-ssa/ivopts-lt-2.c: Likewise.
* gcc.dg/tree-ssa/ivopts-lt.c: Likewise.
* gcc.dg/tree-ssa/predcom-dse-1.c: Likewise.
* gcc.dg/tree-ssa/predcom-dse-2.c: Likewise.
* gcc.dg/tree-ssa/predcom-dse-3.c: Likewise.
* gcc.dg/tree-ssa/predcom-dse-4.c: Likewise.
* gcc.dg/tree-ssa/prefetch-7.c: Likewise.
* gcc.dg/tree-ssa/prefetch-8.c: Likewise.
* gcc.dg/tree-ssa/prefetch-9.c: Likewise.
* gcc.dg/tree-ssa/scev-11.c: Likewise.
* gcc.dg/vect/costmodel/i386/costmodel-vect-31.c: Likewise.
* gcc.dg/vect/costmodel/i386/costmodel-vect-33.c: Likewise.
* gcc.dg/vect/costmodel/x86_64/costmodel-vect-31.c: Likewise.
* gcc.dg/vect/costmodel/x86_64/costmodel-vect-33.c: Likewise.
* gcc.target/i386/pr30970.c: Likewise.
* gcc.target/i386/vect-double-1.c: Likewise.
* gcc.target/i386/vect-double-2.c: Likewise.
* gcc.dg/tree-ssa/gen-vect-2.c: Likewise.
* gcc.dg/tree-ssa/gen-vect-26.c: Likewise.
* gcc.dg/tree-ssa/gen-vect-28.c: Likewise.
* gcc.dg/tree-ssa/gen-vect-32.c: Likewise.
* gfortran.dg/vect/vect-5.f90: Likewise.
* gfortran.dg/vect/vect-8.f90: Likewise.
From-SVN: r271553
Diffstat (limited to 'gcc/tree-loop-distribution.c')
-rw-r--r-- | gcc/tree-loop-distribution.c | 24 |
1 files changed, 15 insertions, 9 deletions
diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c index 3d8f13c..5f03b87 100644 --- a/gcc/tree-loop-distribution.c +++ b/gcc/tree-loop-distribution.c @@ -115,6 +115,7 @@ along with GCC; see the file COPYING3. If not see #include "params.h" #include "tree-vectorizer.h" #include "tree-eh.h" +#include "gimple-fold.h" #define MAX_DATAREFS_NUM \ @@ -1028,6 +1029,7 @@ generate_memset_builtin (struct loop *loop, partition *partition) fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET)); fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes); gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING); + fold_stmt (&gsi); if (dump_file && (dump_flags & TDF_DETAILS)) { @@ -1071,6 +1073,7 @@ generate_memcpy_builtin (struct loop *loop, partition *partition) fn = build_fold_addr_expr (builtin_decl_implicit (kind)); fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes); gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING); + fold_stmt (&gsi); if (dump_file && (dump_flags & TDF_DETAILS)) { @@ -2769,7 +2772,8 @@ finalize_partitions (struct loop *loop, vec<struct partition *> *partitions, static int distribute_loop (struct loop *loop, vec<gimple *> stmts, - control_dependences *cd, int *nb_calls, bool *destroy_p) + control_dependences *cd, int *nb_calls, bool *destroy_p, + bool only_patterns_p) { ddrs_table = new hash_table<ddr_hasher> (389); struct graph *rdg; @@ -2843,7 +2847,7 @@ distribute_loop (struct loop *loop, vec<gimple *> stmts, /* If we are only distributing patterns but did not detect any, simply bail out. */ - if (!flag_tree_loop_distribution + if (only_patterns_p && !any_builtin) { nbp = 0; @@ -2855,7 +2859,7 @@ distribute_loop (struct loop *loop, vec<gimple *> stmts, a loop into pieces, separated by builtin calls. That is, we only want no or a single loop body remaining. */ struct partition *into; - if (!flag_tree_loop_distribution) + if (only_patterns_p) { for (i = 0; partitions.iterate (i, &into); ++i) if (!partition_builtin_p (into)) @@ -3085,7 +3089,6 @@ prepare_perfect_loop_nest (struct loop *loop) && loop_outer (outer) && outer->inner == loop && loop->next == NULL && single_exit (outer) - && optimize_loop_for_speed_p (outer) && !chrec_contains_symbols_defined_in_loop (niters, outer->num) && (niters = number_of_latch_executions (outer)) != NULL_TREE && niters != chrec_dont_know) @@ -3139,9 +3142,11 @@ pass_loop_distribution::execute (function *fun) walking to innermost loops. */ FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST) { - /* Don't distribute multiple exit edges loop, or cold loop. */ + /* Don't distribute multiple exit edges loop, or cold loop when + not doing pattern detection. */ if (!single_exit (loop) - || !optimize_loop_for_speed_p (loop)) + || (!flag_tree_loop_distribute_patterns + && !optimize_loop_for_speed_p (loop))) continue; /* Don't distribute loop if niters is unknown. */ @@ -3169,9 +3174,10 @@ pass_loop_distribution::execute (function *fun) bool destroy_p; int nb_generated_loops, nb_generated_calls; - nb_generated_loops = distribute_loop (loop, work_list, cd, - &nb_generated_calls, - &destroy_p); + nb_generated_loops + = distribute_loop (loop, work_list, cd, &nb_generated_calls, + &destroy_p, (!optimize_loop_for_speed_p (loop) + || !flag_tree_loop_distribution)); if (destroy_p) loops_to_be_destroyed.safe_push (loop); |