aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorHans-Peter Nilsson <hp@axis.com>2020-08-24 03:15:21 +0200
committerHans-Peter Nilsson <hp@axis.com>2020-08-24 03:15:21 +0200
commit0e6c51de8ec47bf5f0dfaabfd1898c722d0485b4 (patch)
treec226f91c199bb1f810f865455c5acebd873855d5 /gcc
parentbaf917af016236878b1246c7412735e8e831bf16 (diff)
downloadgcc-0e6c51de8ec47bf5f0dfaabfd1898c722d0485b4.zip
gcc-0e6c51de8ec47bf5f0dfaabfd1898c722d0485b4.tar.gz
gcc-0e6c51de8ec47bf5f0dfaabfd1898c722d0485b4.tar.bz2
reorg.c (fill_slots_from_thread): Improve for TARGET_FLAGS_REGNUM
This handles TARGET_FLAGS_REGNUM clobbering insns as delay-slot fillers using a method similar to that in commit 33c2207d3fda, where care was taken for fill_simple_delay_slots to allow such insns when scanning for delay-slot fillers *backwards* (before the insn). A TARGET_FLAGS_REGNUM target is typically a former cc0 target. For cc0 targets, insns don't mention clobbering cc0, so the clobbers are mentioned in the "resources" only as a special entity and only for compare-insns and branches, where the cc0 value matters. In contrast, with TARGET_FLAGS_REGNUM, most insns clobber it and the register liveness detection in reorg.c / resource.c treats that as a blocker (for other insns mentioning it, i.e. most) when looking for delay-slot-filling candidates. This means that when comparing core and performance for a delay-slot cc0 target before and after the de-cc0 conversion, the inability to fill a delay slot after conversion manifests as a regression. This was one such case, for CRIS, with random_bitstring in gcc.c-torture/execute/arith-rand-ll.c as well as the target libgcc division function. After this, all known performance regressions compared to cc0 are fixed. gcc: PR target/93372 * reorg.c (fill_slots_from_thread): Allow trial insns that clobber TARGET_FLAGS_REGNUM as delay-slot fillers. gcc/testsuite: PR target/93372 * gcc.target/cris/pr93372-47.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/reorg.c37
-rw-r--r--gcc/testsuite/gcc.target/cris/pr93372-47.c49
2 files changed, 85 insertions, 1 deletions
diff --git a/gcc/reorg.c b/gcc/reorg.c
index 84beb93..613831e 100644
--- a/gcc/reorg.c
+++ b/gcc/reorg.c
@@ -2412,6 +2412,21 @@ fill_slots_from_thread (rtx_jump_insn *insn, rtx condition,
CLEAR_RESOURCE (&needed);
CLEAR_RESOURCE (&set);
+ /* Handle the flags register specially, to be able to accept a
+ candidate that clobbers it. See also fill_simple_delay_slots. */
+ bool filter_flags
+ = (slots_to_fill == 1
+ && targetm.flags_regnum != INVALID_REGNUM
+ && find_regno_note (insn, REG_DEAD, targetm.flags_regnum));
+ struct resources fset;
+ struct resources flags_res;
+ if (filter_flags)
+ {
+ CLEAR_RESOURCE (&fset);
+ CLEAR_RESOURCE (&flags_res);
+ SET_HARD_REG_BIT (flags_res.regs, targetm.flags_regnum);
+ }
+
/* If we do not own this thread, we must stop as soon as we find
something that we can't put in a delay slot, since all we can do
is branch into THREAD at a later point. Therefore, labels stop
@@ -2440,8 +2455,18 @@ fill_slots_from_thread (rtx_jump_insn *insn, rtx condition,
/* If TRIAL conflicts with the insns ahead of it, we lose. Also,
don't separate or copy insns that set and use CC0. */
if (! insn_references_resource_p (trial, &set, true)
- && ! insn_sets_resource_p (trial, &set, true)
+ && ! insn_sets_resource_p (trial, filter_flags ? &fset : &set, true)
&& ! insn_sets_resource_p (trial, &needed, true)
+ /* If we're handling sets to the flags register specially, we
+ only allow an insn into a delay-slot, if it either:
+ - doesn't set the flags register,
+ - the "set" of the flags register isn't used (clobbered),
+ - insns between the delay-slot insn and the trial-insn
+ as accounted in "set", have not affected the flags register. */
+ && (! filter_flags
+ || ! insn_sets_resource_p (trial, &flags_res, true)
+ || find_regno_note (trial, REG_UNUSED, targetm.flags_regnum)
+ || ! TEST_HARD_REG_BIT (set.regs, targetm.flags_regnum))
&& (!HAVE_cc0 || (! (reg_mentioned_p (cc0_rtx, pat)
&& (! own_thread || ! sets_cc0_p (pat)))))
&& ! can_throw_internal (trial))
@@ -2619,6 +2644,16 @@ fill_slots_from_thread (rtx_jump_insn *insn, rtx condition,
lose = 1;
mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
mark_referenced_resources (trial, &needed, true);
+ if (filter_flags)
+ {
+ mark_set_resources (trial, &fset, 0, MARK_SRC_DEST_CALL);
+
+ /* Groups of flags-register setters with users should not
+ affect opportunities to move flags-register-setting insns
+ (clobbers) into the delay-slot. */
+ CLEAR_HARD_REG_BIT (needed.regs, targetm.flags_regnum);
+ CLEAR_HARD_REG_BIT (fset.regs, targetm.flags_regnum);
+ }
/* Ensure we don't put insns between the setting of cc and the comparison
by moving a setting of cc into an earlier delay slot since these insns
diff --git a/gcc/testsuite/gcc.target/cris/pr93372-47.c b/gcc/testsuite/gcc.target/cris/pr93372-47.c
new file mode 100644
index 0000000..8d80ae6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/cris/pr93372-47.c
@@ -0,0 +1,49 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=v10" } */
+/* { dg-final { scan-assembler-times {\tnop} 1 } } */
+
+/* A somewhat brittle test-case, checking that we have (only) one
+ unfilled delay-slot in random_bitstring: there might be none or two
+ or more, and general improvements may lead to unfilled delay-slots.
+ When the scan-assembler-times directive regresses, re-run
+ gcc.c-torture/execute/arith-rand-ll.c, check cycle-level
+ execution-time regressions in random_bitstring and take appropriate
+ action. */
+
+static long long
+simple_rand ()
+{
+ static unsigned long long seed = 47114711;
+ unsigned long long this = seed * 1103515245 + 12345;
+ seed = this;
+ return this >> 8;
+}
+
+unsigned long long int
+random_bitstring ()
+{
+ unsigned long long int x;
+ int n_bits;
+ long long ran;
+ int tot_bits = 0;
+
+ x = 0;
+ for (;;)
+ {
+ ran = simple_rand ();
+ n_bits = (ran >> 1) % 16;
+ tot_bits += n_bits;
+
+ if (n_bits == 0)
+ return x;
+ else
+ {
+ x <<= n_bits;
+ if (ran & 1)
+ x |= (1 << n_bits) - 1;
+
+ if (tot_bits > 8 * sizeof (long long) + 6)
+ return x;
+ }
+ }
+}