diff options
author | Jiufu Guo <guojiufu@linux.ibm.com> | 2021-07-15 17:21:00 +0800 |
---|---|---|
committer | Jiufu Guo <guojiufu@linux.ibm.com> | 2021-07-29 08:42:56 +0800 |
commit | aafa38b5bfed5e3eff258aa5354ed928f4986709 (patch) | |
tree | aa79d882438826d124bc09c9c61168a939e15f8d /gcc | |
parent | 3916902930769d5172c0feaa5f535ca7b2bafdf7 (diff) | |
download | gcc-aafa38b5bfed5e3eff258aa5354ed928f4986709.zip gcc-aafa38b5bfed5e3eff258aa5354ed928f4986709.tar.gz gcc-aafa38b5bfed5e3eff258aa5354ed928f4986709.tar.bz2 |
Use preferred mode for doloop IV [PR61837]
Currently, doloop.xx variable is using the type as niter which may be
shorter than word size. For some targets, it would be better to use
word size type. For example, on 64bit system, to access 32bit value,
subreg maybe used. Then using 64bit type maybe better for niter if
it can be present in both 32bit and 64bit.
This patch add target hook to query preferred mode for doloop IV,
and update mode accordingly.
gcc/ChangeLog:
2021-07-29 Jiufu Guo <guojiufu@linux.ibm.com>
PR target/61837
* config/rs6000/rs6000.c (TARGET_PREFERRED_DOLOOP_MODE): New hook.
(rs6000_preferred_doloop_mode): New hook.
* doc/tm.texi: Regenerate.
* doc/tm.texi.in: Add hook preferred_doloop_mode.
* target.def (preferred_doloop_mode): New hook.
* targhooks.c (default_preferred_doloop_mode): New hook.
* targhooks.h (default_preferred_doloop_mode): New hook.
* tree-ssa-loop-ivopts.c (compute_doloop_base_on_mode): New function.
(add_iv_candidate_for_doloop): Call targetm.preferred_doloop_mode
and compute_doloop_base_on_mode.
gcc/testsuite/ChangeLog:
2021-07-29 Jiufu Guo <guojiufu@linux.ibm.com>
PR target/61837
* gcc.target/powerpc/pr61837.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/rs6000/rs6000.c | 11 | ||||
-rw-r--r-- | gcc/doc/tm.texi | 9 | ||||
-rw-r--r-- | gcc/doc/tm.texi.in | 2 | ||||
-rw-r--r-- | gcc/target.def | 13 | ||||
-rw-r--r-- | gcc/targhooks.c | 8 | ||||
-rw-r--r-- | gcc/targhooks.h | 1 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/powerpc/pr61837.c | 20 | ||||
-rw-r--r-- | gcc/tree-ssa-loop-ivopts.c | 69 |
8 files changed, 131 insertions, 2 deletions
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 279f00c..2de5a96 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -1700,6 +1700,9 @@ static const struct attribute_spec rs6000_attribute_table[] = #undef TARGET_DOLOOP_COST_FOR_ADDRESS #define TARGET_DOLOOP_COST_FOR_ADDRESS 1000000000 +#undef TARGET_PREFERRED_DOLOOP_MODE +#define TARGET_PREFERRED_DOLOOP_MODE rs6000_preferred_doloop_mode + #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv @@ -27935,6 +27938,14 @@ rs6000_predict_doloop_p (struct loop *loop) return true; } +/* Implement TARGET_PREFERRED_DOLOOP_MODE. */ + +static machine_mode +rs6000_preferred_doloop_mode (machine_mode) +{ + return word_mode; +} + /* Implement TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P. */ static bool diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index c8f4abe..d5f8852 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -12011,6 +12011,15 @@ By default, the RTL loop optimizer does not use a present doloop pattern for loops containing function calls or branch on table instructions. @end deftypefn +@deftypefn {Target Hook} machine_mode TARGET_PREFERRED_DOLOOP_MODE (machine_mode @var{mode}) +This hook takes a @var{mode} for a doloop IV, where @code{mode} is the +original mode for the operation. If the target prefers an alternate +@code{mode} for the operation, then this hook should return that mode; +otherwise the original @code{mode} should be returned. For example, on a +64-bit target, @code{DImode} might be preferred over @code{SImode}. Both the +original and the returned modes should be @code{MODE_INT}. +@end deftypefn + @deftypefn {Target Hook} bool TARGET_LEGITIMATE_COMBINED_INSN (rtx_insn *@var{insn}) Take an instruction in @var{insn} and return @code{false} if the instruction is not appropriate as a combination of two or more instructions. The diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index 9c4b501..0b60342 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -7943,6 +7943,8 @@ to by @var{ce_info}. @hook TARGET_INVALID_WITHIN_DOLOOP +@hook TARGET_PREFERRED_DOLOOP_MODE + @hook TARGET_LEGITIMATE_COMBINED_INSN @hook TARGET_CAN_FOLLOW_JUMP diff --git a/gcc/target.def b/gcc/target.def index 2e40448..94e45a9 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -4455,6 +4455,19 @@ loops containing function calls or branch on table instructions.", const char *, (const rtx_insn *insn), default_invalid_within_doloop) +/* Returns the machine mode which the target prefers for doloop IV. */ +DEFHOOK +(preferred_doloop_mode, +"This hook takes a @var{mode} for a doloop IV, where @code{mode} is the\n\ +original mode for the operation. If the target prefers an alternate\n\ +@code{mode} for the operation, then this hook should return that mode;\n\ +otherwise the original @code{mode} should be returned. For example, on a\n\ +64-bit target, @code{DImode} might be preferred over @code{SImode}. Both the\n\ +original and the returned modes should be @code{MODE_INT}.", + machine_mode, + (machine_mode mode), + default_preferred_doloop_mode) + /* Returns true for a legitimate combined insn. */ DEFHOOK (legitimate_combined_insn, diff --git a/gcc/targhooks.c b/gcc/targhooks.c index 44a1fac..eb51909 100644 --- a/gcc/targhooks.c +++ b/gcc/targhooks.c @@ -660,6 +660,14 @@ default_predict_doloop_p (class loop *loop ATTRIBUTE_UNUSED) return false; } +/* By default, just use the input MODE itself. */ + +machine_mode +default_preferred_doloop_mode (machine_mode mode) +{ + return mode; +} + /* NULL if INSN insn is valid within a low-overhead loop, otherwise returns an error message. diff --git a/gcc/targhooks.h b/gcc/targhooks.h index f70a307d..f92e102 100644 --- a/gcc/targhooks.h +++ b/gcc/targhooks.h @@ -88,6 +88,7 @@ extern bool default_fixed_point_supported_p (void); extern bool default_has_ifunc_p (void); extern bool default_predict_doloop_p (class loop *); +extern machine_mode default_preferred_doloop_mode (machine_mode); extern const char * default_invalid_within_doloop (const rtx_insn *); extern tree default_builtin_vectorized_function (unsigned int, tree, tree); diff --git a/gcc/testsuite/gcc.target/powerpc/pr61837.c b/gcc/testsuite/gcc.target/powerpc/pr61837.c new file mode 100644 index 0000000..e5a0c4f --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr61837.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-rtl-loop2_doloop -fno-unroll-loops" } */ +/* The inner loop would use the doloop IV in word_mode. And then + there is no need to access it though zero_extend on shorter mode. */ +void foo(int *p1, long *p2, int s) +{ + int n, v, i; + + v = 0; + for (n = 0; n <= 100; n++) { + for (i = 0; i < s; i++) + if (p2[i] == n) + p1[i] = v; + v += 88; + } +} + +/* { dg-final {scan-rtl-dump-not {(?p)zero_extend.*doloop} "loop2_doloop"} } */ +/* { dg-final {scan-rtl-dump-not {(?p)reg:SI.*doloop} "loop2_doloop" { target lp64 } } } */ + diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c index 12a8a49..47b867f 100644 --- a/gcc/tree-ssa-loop-ivopts.c +++ b/gcc/tree-ssa-loop-ivopts.c @@ -5657,6 +5657,59 @@ relate_compare_use_with_all_cands (struct ivopts_data *data) } } +/* If PREFERRED_MODE is suitable and profitable, use the preferred + PREFERRED_MODE to compute doloop iv base from niter: base = niter + 1. */ + +static tree +compute_doloop_base_on_mode (machine_mode preferred_mode, tree niter, + const widest_int &iterations_max) +{ + tree ntype = TREE_TYPE (niter); + tree pref_type = lang_hooks.types.type_for_mode (preferred_mode, 1); + if (!pref_type) + return fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter), + build_int_cst (ntype, 1)); + + gcc_assert (TREE_CODE (pref_type) == INTEGER_TYPE); + + int prec = TYPE_PRECISION (ntype); + int pref_prec = TYPE_PRECISION (pref_type); + + tree base; + + /* Check if the PREFERRED_MODED is able to present niter. */ + if (pref_prec > prec + || wi::ltu_p (iterations_max, + widest_int::from (wi::max_value (pref_prec, UNSIGNED), + UNSIGNED))) + { + /* No wrap, it is safe to use preferred type after niter + 1. */ + if (wi::ltu_p (iterations_max, + widest_int::from (wi::max_value (prec, UNSIGNED), + UNSIGNED))) + { + /* This could help to optimize "-1 +1" pair when niter looks + like "n-1": n is in original mode. "base = (n - 1) + 1" + in PREFERRED_MODED: it could be base = (PREFERRED_TYPE)n. */ + base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter), + build_int_cst (ntype, 1)); + base = fold_convert (pref_type, base); + } + + /* To avoid wrap, convert niter to preferred type before plus 1. */ + else + { + niter = fold_convert (pref_type, niter); + base = fold_build2 (PLUS_EXPR, pref_type, unshare_expr (niter), + build_int_cst (pref_type, 1)); + } + } + else + base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter), + build_int_cst (ntype, 1)); + return base; +} + /* Add one doloop dedicated IV candidate: - Base is (may_be_zero ? 1 : (niter + 1)). - Step is -1. */ @@ -5688,8 +5741,20 @@ add_iv_candidate_for_doloop (struct ivopts_data *data) return; } - tree base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter), - build_int_cst (ntype, 1)); + machine_mode mode = TYPE_MODE (ntype); + machine_mode pref_mode = targetm.preferred_doloop_mode (mode); + + tree base; + if (mode != pref_mode) + { + base = compute_doloop_base_on_mode (pref_mode, niter, niter_desc->max); + ntype = TREE_TYPE (base); + } + else + base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter), + build_int_cst (ntype, 1)); + + add_candidate (data, base, build_int_cst (ntype, -1), true, NULL, NULL, true); } |