aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/rs6000/rs6000.c11
-rw-r--r--gcc/doc/tm.texi9
-rw-r--r--gcc/doc/tm.texi.in2
-rw-r--r--gcc/target.def13
-rw-r--r--gcc/targhooks.c8
-rw-r--r--gcc/targhooks.h1
-rw-r--r--gcc/testsuite/gcc.target/powerpc/pr61837.c20
-rw-r--r--gcc/tree-ssa-loop-ivopts.c69
8 files changed, 131 insertions, 2 deletions
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 279f00c..2de5a96 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -1700,6 +1700,9 @@ static const struct attribute_spec rs6000_attribute_table[] =
#undef TARGET_DOLOOP_COST_FOR_ADDRESS
#define TARGET_DOLOOP_COST_FOR_ADDRESS 1000000000
+#undef TARGET_PREFERRED_DOLOOP_MODE
+#define TARGET_PREFERRED_DOLOOP_MODE rs6000_preferred_doloop_mode
+
#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
@@ -27935,6 +27938,14 @@ rs6000_predict_doloop_p (struct loop *loop)
return true;
}
+/* Implement TARGET_PREFERRED_DOLOOP_MODE. */
+
+static machine_mode
+rs6000_preferred_doloop_mode (machine_mode)
+{
+ return word_mode;
+}
+
/* Implement TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P. */
static bool
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index c8f4abe..d5f8852 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -12011,6 +12011,15 @@ By default, the RTL loop optimizer does not use a present doloop pattern for
loops containing function calls or branch on table instructions.
@end deftypefn
+@deftypefn {Target Hook} machine_mode TARGET_PREFERRED_DOLOOP_MODE (machine_mode @var{mode})
+This hook takes a @var{mode} for a doloop IV, where @code{mode} is the
+original mode for the operation. If the target prefers an alternate
+@code{mode} for the operation, then this hook should return that mode;
+otherwise the original @code{mode} should be returned. For example, on a
+64-bit target, @code{DImode} might be preferred over @code{SImode}. Both the
+original and the returned modes should be @code{MODE_INT}.
+@end deftypefn
+
@deftypefn {Target Hook} bool TARGET_LEGITIMATE_COMBINED_INSN (rtx_insn *@var{insn})
Take an instruction in @var{insn} and return @code{false} if the instruction
is not appropriate as a combination of two or more instructions. The
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 9c4b501..0b60342 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -7943,6 +7943,8 @@ to by @var{ce_info}.
@hook TARGET_INVALID_WITHIN_DOLOOP
+@hook TARGET_PREFERRED_DOLOOP_MODE
+
@hook TARGET_LEGITIMATE_COMBINED_INSN
@hook TARGET_CAN_FOLLOW_JUMP
diff --git a/gcc/target.def b/gcc/target.def
index 2e40448..94e45a9 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -4455,6 +4455,19 @@ loops containing function calls or branch on table instructions.",
const char *, (const rtx_insn *insn),
default_invalid_within_doloop)
+/* Returns the machine mode which the target prefers for doloop IV. */
+DEFHOOK
+(preferred_doloop_mode,
+"This hook takes a @var{mode} for a doloop IV, where @code{mode} is the\n\
+original mode for the operation. If the target prefers an alternate\n\
+@code{mode} for the operation, then this hook should return that mode;\n\
+otherwise the original @code{mode} should be returned. For example, on a\n\
+64-bit target, @code{DImode} might be preferred over @code{SImode}. Both the\n\
+original and the returned modes should be @code{MODE_INT}.",
+ machine_mode,
+ (machine_mode mode),
+ default_preferred_doloop_mode)
+
/* Returns true for a legitimate combined insn. */
DEFHOOK
(legitimate_combined_insn,
diff --git a/gcc/targhooks.c b/gcc/targhooks.c
index 44a1fac..eb51909 100644
--- a/gcc/targhooks.c
+++ b/gcc/targhooks.c
@@ -660,6 +660,14 @@ default_predict_doloop_p (class loop *loop ATTRIBUTE_UNUSED)
return false;
}
+/* By default, just use the input MODE itself. */
+
+machine_mode
+default_preferred_doloop_mode (machine_mode mode)
+{
+ return mode;
+}
+
/* NULL if INSN insn is valid within a low-overhead loop, otherwise returns
an error message.
diff --git a/gcc/targhooks.h b/gcc/targhooks.h
index f70a307d..f92e102 100644
--- a/gcc/targhooks.h
+++ b/gcc/targhooks.h
@@ -88,6 +88,7 @@ extern bool default_fixed_point_supported_p (void);
extern bool default_has_ifunc_p (void);
extern bool default_predict_doloop_p (class loop *);
+extern machine_mode default_preferred_doloop_mode (machine_mode);
extern const char * default_invalid_within_doloop (const rtx_insn *);
extern tree default_builtin_vectorized_function (unsigned int, tree, tree);
diff --git a/gcc/testsuite/gcc.target/powerpc/pr61837.c b/gcc/testsuite/gcc.target/powerpc/pr61837.c
new file mode 100644
index 0000000..e5a0c4f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr61837.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-rtl-loop2_doloop -fno-unroll-loops" } */
+/* The inner loop would use the doloop IV in word_mode. And then
+ there is no need to access it though zero_extend on shorter mode. */
+void foo(int *p1, long *p2, int s)
+{
+ int n, v, i;
+
+ v = 0;
+ for (n = 0; n <= 100; n++) {
+ for (i = 0; i < s; i++)
+ if (p2[i] == n)
+ p1[i] = v;
+ v += 88;
+ }
+}
+
+/* { dg-final {scan-rtl-dump-not {(?p)zero_extend.*doloop} "loop2_doloop"} } */
+/* { dg-final {scan-rtl-dump-not {(?p)reg:SI.*doloop} "loop2_doloop" { target lp64 } } } */
+
diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c
index 12a8a49..47b867f 100644
--- a/gcc/tree-ssa-loop-ivopts.c
+++ b/gcc/tree-ssa-loop-ivopts.c
@@ -5657,6 +5657,59 @@ relate_compare_use_with_all_cands (struct ivopts_data *data)
}
}
+/* If PREFERRED_MODE is suitable and profitable, use the preferred
+ PREFERRED_MODE to compute doloop iv base from niter: base = niter + 1. */
+
+static tree
+compute_doloop_base_on_mode (machine_mode preferred_mode, tree niter,
+ const widest_int &iterations_max)
+{
+ tree ntype = TREE_TYPE (niter);
+ tree pref_type = lang_hooks.types.type_for_mode (preferred_mode, 1);
+ if (!pref_type)
+ return fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
+ build_int_cst (ntype, 1));
+
+ gcc_assert (TREE_CODE (pref_type) == INTEGER_TYPE);
+
+ int prec = TYPE_PRECISION (ntype);
+ int pref_prec = TYPE_PRECISION (pref_type);
+
+ tree base;
+
+ /* Check if the PREFERRED_MODED is able to present niter. */
+ if (pref_prec > prec
+ || wi::ltu_p (iterations_max,
+ widest_int::from (wi::max_value (pref_prec, UNSIGNED),
+ UNSIGNED)))
+ {
+ /* No wrap, it is safe to use preferred type after niter + 1. */
+ if (wi::ltu_p (iterations_max,
+ widest_int::from (wi::max_value (prec, UNSIGNED),
+ UNSIGNED)))
+ {
+ /* This could help to optimize "-1 +1" pair when niter looks
+ like "n-1": n is in original mode. "base = (n - 1) + 1"
+ in PREFERRED_MODED: it could be base = (PREFERRED_TYPE)n. */
+ base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
+ build_int_cst (ntype, 1));
+ base = fold_convert (pref_type, base);
+ }
+
+ /* To avoid wrap, convert niter to preferred type before plus 1. */
+ else
+ {
+ niter = fold_convert (pref_type, niter);
+ base = fold_build2 (PLUS_EXPR, pref_type, unshare_expr (niter),
+ build_int_cst (pref_type, 1));
+ }
+ }
+ else
+ base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
+ build_int_cst (ntype, 1));
+ return base;
+}
+
/* Add one doloop dedicated IV candidate:
- Base is (may_be_zero ? 1 : (niter + 1)).
- Step is -1. */
@@ -5688,8 +5741,20 @@ add_iv_candidate_for_doloop (struct ivopts_data *data)
return;
}
- tree base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
- build_int_cst (ntype, 1));
+ machine_mode mode = TYPE_MODE (ntype);
+ machine_mode pref_mode = targetm.preferred_doloop_mode (mode);
+
+ tree base;
+ if (mode != pref_mode)
+ {
+ base = compute_doloop_base_on_mode (pref_mode, niter, niter_desc->max);
+ ntype = TREE_TYPE (base);
+ }
+ else
+ base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
+ build_int_cst (ntype, 1));
+
+
add_candidate (data, base, build_int_cst (ntype, -1), true, NULL, NULL, true);
}