reorg.c (fill_slots_from_thread): Improve for TARGET_FLAGS_REGNUM targets

Hans-Peter Nilsson hp@axis.com
Fri Aug 14 05:27:53 GMT 2020


Originally I thought to bootstrap this patch on MIPS and SPARC
since they're both delayed-branch-slot targets but I
reconsidered, as neither is a TARGET_FLAGS_REGNUM target.  It
seems only visium and CRIS has this feature set, and I see no
trace of visium in neither newlib nor the simulator next to
glibc.  So, I just tested cris-elf.

This handles TARGET_FLAGS_REGNUM clobbering insns as delay-slot
fillers using a method similar to that in commit 33c2207d3fda,
where care was taken for fill_simple_delay_slots to allow such
insns when scanning for delay-slot fillers *backwards* (before
the insn).

A TARGET_FLAGS_REGNUM target is typically a former cc0 target.
For cc0 targets, insns don't mention clobbering cc0, so the
clobbers are mentioned in the "resources" only as a special
entity and only for compare-insns and branches, where the cc0
value matters.

In contrast, with TARGET_FLAGS_REGNUM, most insns clobber it and
the register liveness detection in reorg.c / resource.c treats
that as a blocker (for other insns mentioning it, i.e. most)
when looking for delay-slot-filling candidates.  This means that
when comparing core and performance for a delay-slot cc0 target
before and after the de-cc0 conversion, the inability to fill a
delay slot after conversion manifests as a regression.  This was
one such case, for CRIS, with random_bitstring in
gcc.c-torture/execute/arith-rand-ll.c as well as the target
libgcc division function.

After this, all known performance regressions compared to cc0
are fixed.

Ok to commit?

gcc:
	PR target/93372
	* reorg.c (fill_slots_from_thread): Allow trial insns that clobber
	TARGET_FLAGS_REGNUM as delay-slot fillers.

gcc/testsuite:
	PR target/93372
	* gcc.target/cris/pr93372-47.c: New test.
---
 gcc/reorg.c                                | 37 +++++++++++++++++++++-
 gcc/testsuite/gcc.target/cris/pr93372-47.c | 49 ++++++++++++++++++++++++++++++
 2 files changed, 85 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/cris/pr93372-47.c

diff --git a/gcc/reorg.c b/gcc/reorg.c
index dfd7494bf..83161caa0 100644
--- a/gcc/reorg.c
+++ b/gcc/reorg.c
@@ -2411,6 +2411,21 @@ fill_slots_from_thread (rtx_jump_insn *insn, rtx condition,
   CLEAR_RESOURCE (&needed);
   CLEAR_RESOURCE (&set);
 
+  /* Handle the flags register specially, to be able to accept a
+     candidate that clobbers it.  See also fill_simple_delay_slots.  */
+  bool filter_flags
+    = (slots_to_fill == 1
+       && targetm.flags_regnum != INVALID_REGNUM
+       && find_regno_note (insn, REG_DEAD, targetm.flags_regnum));
+  struct resources fset;
+  struct resources flags_res;
+  if (filter_flags)
+    {
+      CLEAR_RESOURCE (&fset);
+      CLEAR_RESOURCE (&flags_res);
+      SET_HARD_REG_BIT (flags_res.regs, targetm.flags_regnum);
+    }
+
   /* If we do not own this thread, we must stop as soon as we find
      something that we can't put in a delay slot, since all we can do
      is branch into THREAD at a later point.  Therefore, labels stop
@@ -2439,8 +2454,18 @@ fill_slots_from_thread (rtx_jump_insn *insn, rtx condition,
       /* If TRIAL conflicts with the insns ahead of it, we lose.  Also,
 	 don't separate or copy insns that set and use CC0.  */
       if (! insn_references_resource_p (trial, &set, true)
-	  && ! insn_sets_resource_p (trial, &set, true)
+	  && ! insn_sets_resource_p (trial, filter_flags ? &fset : &set, true)
 	  && ! insn_sets_resource_p (trial, &needed, true)
+	  /* If we're handling sets to the flags register specially, we
+	     only allow an insn into a delay-slot, if it either:
+	     - doesn't set the flags register,
+	     - the "set" of the flags register isn't used (clobbered),
+	     - insns between the delay-slot insn and the trial-insn
+	     as accounted in "set", have not affected the flags register.  */
+	  && (! filter_flags
+	      || ! insn_sets_resource_p (trial, &flags_res, true)
+	      || find_regno_note (trial, REG_UNUSED, targetm.flags_regnum)
+	      || ! TEST_HARD_REG_BIT (set.regs, targetm.flags_regnum))
 	  && (!HAVE_cc0 || (! (reg_mentioned_p (cc0_rtx, pat)
 			      && (! own_thread || ! sets_cc0_p (pat)))))
 	  && ! can_throw_internal (trial))
@@ -2618,6 +2643,16 @@ fill_slots_from_thread (rtx_jump_insn *insn, rtx condition,
       lose = 1;
       mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
       mark_referenced_resources (trial, &needed, true);
+      if (filter_flags)
+	{
+	  mark_set_resources (trial, &fset, 0, MARK_SRC_DEST_CALL);
+
+	  /* Groups of flags-register setters with users should not
+	     affect opportunities to move flags-register-setting insns
+	     (clobbers) into the delay-slot.  */
+	  CLEAR_HARD_REG_BIT (needed.regs, targetm.flags_regnum);
+	  CLEAR_HARD_REG_BIT (fset.regs, targetm.flags_regnum);
+	}
 
       /* Ensure we don't put insns between the setting of cc and the comparison
 	 by moving a setting of cc into an earlier delay slot since these insns
diff --git a/gcc/testsuite/gcc.target/cris/pr93372-47.c b/gcc/testsuite/gcc.target/cris/pr93372-47.c
new file mode 100644
index 000000000..8d80ae6b4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/cris/pr93372-47.c
@@ -0,0 +1,49 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=v10" } */
+/* { dg-final { scan-assembler-times {\tnop} 1 } } */
+
+/* A somewhat brittle test-case, checking that we have (only) one
+   unfilled delay-slot in random_bitstring: there might be none or two
+   or more, and general improvements may lead to unfilled delay-slots.
+   When the scan-assembler-times directive regresses, re-run
+   gcc.c-torture/execute/arith-rand-ll.c, check cycle-level
+   execution-time regressions in random_bitstring and take appropriate
+   action.  */
+
+static long long
+simple_rand ()
+{
+  static unsigned long long seed = 47114711;
+  unsigned long long this = seed * 1103515245 + 12345;
+  seed = this;
+  return this >> 8;
+}
+
+unsigned long long int
+random_bitstring ()
+{
+  unsigned long long int x;
+  int n_bits;
+  long long ran;
+  int tot_bits = 0;
+
+  x = 0;
+  for (;;)
+    {
+      ran = simple_rand ();
+      n_bits = (ran >> 1) % 16;
+      tot_bits += n_bits;
+
+      if (n_bits == 0)
+	return x;
+      else
+	{
+	  x <<= n_bits;
+	  if (ran & 1)
+	    x |= (1 << n_bits) - 1;
+
+	  if (tot_bits > 8 * sizeof (long long) + 6)
+	    return x;
+	}
+    }
+}
-- 
2.11.0



More information about the Gcc-patches mailing list