diff options
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/rs6000/rs6000.c | 11 | ||||
-rw-r--r-- | gcc/doc/tm.texi | 9 | ||||
-rw-r--r-- | gcc/doc/tm.texi.in | 2 | ||||
-rw-r--r-- | gcc/target.def | 13 | ||||
-rw-r--r-- | gcc/targhooks.c | 8 | ||||
-rw-r--r-- | gcc/targhooks.h | 1 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/powerpc/pr61837.c | 20 | ||||
-rw-r--r-- | gcc/tree-ssa-loop-ivopts.c | 69 |
8 files changed, 131 insertions, 2 deletions
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 279f00c..2de5a96 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -1700,6 +1700,9 @@ static const struct attribute_spec rs6000_attribute_table[] = #undef TARGET_DOLOOP_COST_FOR_ADDRESS #define TARGET_DOLOOP_COST_FOR_ADDRESS 1000000000 +#undef TARGET_PREFERRED_DOLOOP_MODE +#define TARGET_PREFERRED_DOLOOP_MODE rs6000_preferred_doloop_mode + #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv @@ -27935,6 +27938,14 @@ rs6000_predict_doloop_p (struct loop *loop) return true; } +/* Implement TARGET_PREFERRED_DOLOOP_MODE. */ + +static machine_mode +rs6000_preferred_doloop_mode (machine_mode) +{ + return word_mode; +} + /* Implement TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P. */ static bool diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index c8f4abe..d5f8852 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -12011,6 +12011,15 @@ By default, the RTL loop optimizer does not use a present doloop pattern for loops containing function calls or branch on table instructions. @end deftypefn +@deftypefn {Target Hook} machine_mode TARGET_PREFERRED_DOLOOP_MODE (machine_mode @var{mode}) +This hook takes a @var{mode} for a doloop IV, where @code{mode} is the +original mode for the operation. If the target prefers an alternate +@code{mode} for the operation, then this hook should return that mode; +otherwise the original @code{mode} should be returned. For example, on a +64-bit target, @code{DImode} might be preferred over @code{SImode}. Both the +original and the returned modes should be @code{MODE_INT}. +@end deftypefn + @deftypefn {Target Hook} bool TARGET_LEGITIMATE_COMBINED_INSN (rtx_insn *@var{insn}) Take an instruction in @var{insn} and return @code{false} if the instruction is not appropriate as a combination of two or more instructions. The diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index 9c4b501..0b60342 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -7943,6 +7943,8 @@ to by @var{ce_info}. @hook TARGET_INVALID_WITHIN_DOLOOP +@hook TARGET_PREFERRED_DOLOOP_MODE + @hook TARGET_LEGITIMATE_COMBINED_INSN @hook TARGET_CAN_FOLLOW_JUMP diff --git a/gcc/target.def b/gcc/target.def index 2e40448..94e45a9 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -4455,6 +4455,19 @@ loops containing function calls or branch on table instructions.", const char *, (const rtx_insn *insn), default_invalid_within_doloop) +/* Returns the machine mode which the target prefers for doloop IV. */ +DEFHOOK +(preferred_doloop_mode, +"This hook takes a @var{mode} for a doloop IV, where @code{mode} is the\n\ +original mode for the operation. If the target prefers an alternate\n\ +@code{mode} for the operation, then this hook should return that mode;\n\ +otherwise the original @code{mode} should be returned. For example, on a\n\ +64-bit target, @code{DImode} might be preferred over @code{SImode}. Both the\n\ +original and the returned modes should be @code{MODE_INT}.", + machine_mode, + (machine_mode mode), + default_preferred_doloop_mode) + /* Returns true for a legitimate combined insn. */ DEFHOOK (legitimate_combined_insn, diff --git a/gcc/targhooks.c b/gcc/targhooks.c index 44a1fac..eb51909 100644 --- a/gcc/targhooks.c +++ b/gcc/targhooks.c @@ -660,6 +660,14 @@ default_predict_doloop_p (class loop *loop ATTRIBUTE_UNUSED) return false; } +/* By default, just use the input MODE itself. */ + +machine_mode +default_preferred_doloop_mode (machine_mode mode) +{ + return mode; +} + /* NULL if INSN insn is valid within a low-overhead loop, otherwise returns an error message. diff --git a/gcc/targhooks.h b/gcc/targhooks.h index f70a307d..f92e102 100644 --- a/gcc/targhooks.h +++ b/gcc/targhooks.h @@ -88,6 +88,7 @@ extern bool default_fixed_point_supported_p (void); extern bool default_has_ifunc_p (void); extern bool default_predict_doloop_p (class loop *); +extern machine_mode default_preferred_doloop_mode (machine_mode); extern const char * default_invalid_within_doloop (const rtx_insn *); extern tree default_builtin_vectorized_function (unsigned int, tree, tree); diff --git a/gcc/testsuite/gcc.target/powerpc/pr61837.c b/gcc/testsuite/gcc.target/powerpc/pr61837.c new file mode 100644 index 0000000..e5a0c4f --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr61837.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-rtl-loop2_doloop -fno-unroll-loops" } */ +/* The inner loop would use the doloop IV in word_mode. And then + there is no need to access it though zero_extend on shorter mode. */ +void foo(int *p1, long *p2, int s) +{ + int n, v, i; + + v = 0; + for (n = 0; n <= 100; n++) { + for (i = 0; i < s; i++) + if (p2[i] == n) + p1[i] = v; + v += 88; + } +} + +/* { dg-final {scan-rtl-dump-not {(?p)zero_extend.*doloop} "loop2_doloop"} } */ +/* { dg-final {scan-rtl-dump-not {(?p)reg:SI.*doloop} "loop2_doloop" { target lp64 } } } */ + diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c index 12a8a49..47b867f 100644 --- a/gcc/tree-ssa-loop-ivopts.c +++ b/gcc/tree-ssa-loop-ivopts.c @@ -5657,6 +5657,59 @@ relate_compare_use_with_all_cands (struct ivopts_data *data) } } +/* If PREFERRED_MODE is suitable and profitable, use the preferred + PREFERRED_MODE to compute doloop iv base from niter: base = niter + 1. */ + +static tree +compute_doloop_base_on_mode (machine_mode preferred_mode, tree niter, + const widest_int &iterations_max) +{ + tree ntype = TREE_TYPE (niter); + tree pref_type = lang_hooks.types.type_for_mode (preferred_mode, 1); + if (!pref_type) + return fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter), + build_int_cst (ntype, 1)); + + gcc_assert (TREE_CODE (pref_type) == INTEGER_TYPE); + + int prec = TYPE_PRECISION (ntype); + int pref_prec = TYPE_PRECISION (pref_type); + + tree base; + + /* Check if the PREFERRED_MODED is able to present niter. */ + if (pref_prec > prec + || wi::ltu_p (iterations_max, + widest_int::from (wi::max_value (pref_prec, UNSIGNED), + UNSIGNED))) + { + /* No wrap, it is safe to use preferred type after niter + 1. */ + if (wi::ltu_p (iterations_max, + widest_int::from (wi::max_value (prec, UNSIGNED), + UNSIGNED))) + { + /* This could help to optimize "-1 +1" pair when niter looks + like "n-1": n is in original mode. "base = (n - 1) + 1" + in PREFERRED_MODED: it could be base = (PREFERRED_TYPE)n. */ + base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter), + build_int_cst (ntype, 1)); + base = fold_convert (pref_type, base); + } + + /* To avoid wrap, convert niter to preferred type before plus 1. */ + else + { + niter = fold_convert (pref_type, niter); + base = fold_build2 (PLUS_EXPR, pref_type, unshare_expr (niter), + build_int_cst (pref_type, 1)); + } + } + else + base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter), + build_int_cst (ntype, 1)); + return base; +} + /* Add one doloop dedicated IV candidate: - Base is (may_be_zero ? 1 : (niter + 1)). - Step is -1. */ @@ -5688,8 +5741,20 @@ add_iv_candidate_for_doloop (struct ivopts_data *data) return; } - tree base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter), - build_int_cst (ntype, 1)); + machine_mode mode = TYPE_MODE (ntype); + machine_mode pref_mode = targetm.preferred_doloop_mode (mode); + + tree base; + if (mode != pref_mode) + { + base = compute_doloop_base_on_mode (pref_mode, niter, niter_desc->max); + ntype = TREE_TYPE (base); + } + else + base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter), + build_int_cst (ntype, 1)); + + add_candidate (data, base, build_int_cst (ntype, -1), true, NULL, NULL, true); } |