This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH, GCC, AArch64] Fix PR88398 for AArch64


Hi

This patch is trying to fix PR88398 for AArch64. As discussed in the PR, 
loop unrolling is probably what we can do here. As an easy fix, the 
existing unroll_stupid is unrolling the given example better than the 
unroll_runtime_iterations since the the loop contains a break inside it.

So all I have done here is:
1) Add a target hook so that this is AArch64 specific.
2) We are not unrolling the loops that decide_unroll_runtime_iterations
would reject.
3) Out of the ones that decide_unroll_runtime_iterations would accept, 
check if the loop has more than 1 exit (this is done in the new target 
hook) and if it does, try to unroll using unroll_stupid.

Regression tested on AArch64 and added the test from the PR. This gives 
an overall code size reduction of 2.35% and performance gain of 0.498% 
on Spec2017 Intrate.

Is this ok for trunk?

Thanks
Sudi

gcc/ChangeLog:

2019-xx-xx  Sudakshina Das  <sudi.das@arm.com>

	PR88398
	* cfgloop.h: Include target.h.
	(lpt_dec): Move to...
	* target.h (lpt_dec): ... Here.
	* target.def: Define TARGET_LOOP_DECISION_ADJUST.
	* loop-unroll.c (decide_unroll_runtime_iterations): Use new target hook.
	(decide_unroll_stupid): Likewise.
	* config/aarch64/aarch64.c (aarch64_loop_decision_adjust): New function.
	(TARGET_LOOP_DECISION_ADJUST): Define for AArch64.
	* doc/tm.texi: Regenerated.
	* doc/tm.texi.in: Document TARGET_LOOP_DECISION_ADJUST.

gcc/testsuite/ChangeLog:

2019-xx-xx  Sudakshina Das  <sudi.das@arm.com>

	PR88398
	* gcc.target/aarch64/pr88398.c: New test.
diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h
index 0b0154ffd7bf031a005de993b101d9db6dd98c43..985c74e3b60728fc8c9d34b69634488cae3451cb 100644
--- a/gcc/cfgloop.h
+++ b/gcc/cfgloop.h
@@ -21,15 +21,7 @@ along with GCC; see the file COPYING3.  If not see
 #define GCC_CFGLOOP_H
 
 #include "cfgloopmanip.h"
-
-/* Structure to hold decision about unrolling/peeling.  */
-enum lpt_dec
-{
-  LPT_NONE,
-  LPT_UNROLL_CONSTANT,
-  LPT_UNROLL_RUNTIME,
-  LPT_UNROLL_STUPID
-};
+#include "target.h"
 
 struct GTY (()) lpt_decision {
   enum lpt_dec decision;
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 599d07a729e7438080f8b5240ee95037a49fb983..f31ac41d66257c01ead8d5f5b9b22379ecb5d276 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -21093,6 +21093,39 @@ aarch64_sched_can_speculate_insn (rtx_insn *insn)
     }
 }
 
+/* Implement TARGET_LOOP_DECISION_ADJUST.  CONSIDER is the loop decision
+   currently being checked for loop LOOP.  This returns a decision which could
+   either be LPT_UNROLL_STUPID or the current value in LOOP.  */
+static enum lpt_dec
+aarch64_loop_decision_adjust (enum lpt_dec consider, class loop *loop)
+{
+  switch (consider)
+    {
+    case LPT_UNROLL_CONSTANT:
+      return loop->lpt_decision.decision;
+
+    case LPT_UNROLL_RUNTIME:
+    /* Fall through.  */
+    case LPT_UNROLL_STUPID:
+      {
+	vec<edge> edges = get_loop_exit_edges (loop);
+	if (edges.length () > 1)
+	  {
+	    if (dump_file)
+	      fprintf (dump_file, ";; Need change in loop decision\n");
+	    consider = LPT_UNROLL_STUPID;
+	    return consider;
+	  }
+	return loop->lpt_decision.decision;
+      }
+
+    case LPT_NONE:
+    /* Fall through.  */
+    default:
+      gcc_unreachable ();
+    }
+}
+
 /* Implement TARGET_COMPUTE_PRESSURE_CLASSES.  */
 
 static int
@@ -21839,6 +21872,9 @@ aarch64_libgcc_floating_mode_supported_p
 #undef TARGET_CAN_USE_DOLOOP_P
 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
 
+#undef TARGET_LOOP_DECISION_ADJUST
+#define TARGET_LOOP_DECISION_ADJUST aarch64_loop_decision_adjust
+
 #undef TARGET_SCHED_ADJUST_PRIORITY
 #define TARGET_SCHED_ADJUST_PRIORITY aarch64_sched_adjust_priority
 
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index cd9aed9874f4e6b2b0e2f8956ed6155975e643a8..61bd00e84c8a2a8865e95ba579c3b94790ab1331 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -11857,6 +11857,15 @@ is required only when the target has special constraints like maximum
 number of memory accesses.
 @end deftypefn
 
+@deftypefn {Target Hook} {enum lpt_dec} TARGET_LOOP_DECISION_ADJUST (enum lpt_dec @var{consider}, class loop *@var{loop})
+This target hook returns either a new value for the loop unrolling
+decision or the existing value in @var{loop}. The parameter @var{consider}
+is the loop decision currently being tested. The parameter @var{loop} is a
+pointer to the loop, which is going to be checked for unrolling. This target
+hook is required only when the target wants to override the unrolling
+decisions.
+@end deftypefn
+
 @defmac POWI_MAX_MULTS
 If defined, this macro is interpreted as a signed integer C expression
 that specifies the maximum number of floating point multiplications
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 2739e9ceec5ad7253ff9135da8dbe3bf6010e8d7..7a7f917fb45a6cc22f373ff16f8b78aa3e35f210 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -8026,6 +8026,8 @@ build_type_attribute_variant (@var{mdecl},
 
 @hook TARGET_LOOP_UNROLL_ADJUST
 
+@hook TARGET_LOOP_DECISION_ADJUST
+
 @defmac POWI_MAX_MULTS
 If defined, this macro is interpreted as a signed integer C expression
 that specifies the maximum number of floating point multiplications
diff --git a/gcc/loop-unroll.c b/gcc/loop-unroll.c
index 63fccd23fae38f8918a7d94411aaa43c72830dd3..d8f09d445764f74b8142ff3996cd1db229464202 100644
--- a/gcc/loop-unroll.c
+++ b/gcc/loop-unroll.c
@@ -672,6 +672,7 @@ decide_unroll_runtime_iterations (class loop *loop, int flags)
   unsigned nunroll, nunroll_by_av, i;
   class niter_desc *desc;
   widest_int iterations;
+  enum lpt_dec decision = LPT_UNROLL_RUNTIME;
 
   /* If we were not asked to unroll this loop, just return back silently.  */
   if (!(flags & UAP_UNROLL) && !loop->unroll)
@@ -735,6 +736,16 @@ decide_unroll_runtime_iterations (class loop *loop, int flags)
       return;
     }
 
+  if (targetm.loop_decision_adjust)
+    decision = targetm.loop_decision_adjust (decision, loop);
+
+  if (decision != loop->lpt_decision.decision)
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; Not unrolling loop, there maybe a better decision\n");
+      return;
+    }
+
   /* Success; now force nunroll to be power of 2, as code-gen
      requires it, we are unable to cope with overflows in
      computation of number of iterations.  */
@@ -1157,9 +1168,16 @@ decide_unroll_stupid (class loop *loop, int flags)
   unsigned nunroll, nunroll_by_av, i;
   class niter_desc *desc;
   widest_int iterations;
+  enum lpt_dec decision = LPT_UNROLL_STUPID;
 
-  /* If we were not asked to unroll this loop, just return back silently.  */
-  if (!(flags & UAP_UNROLL_ALL) && !loop->unroll)
+  if (targetm.loop_decision_adjust)
+    decision = targetm.loop_decision_adjust (decision, loop);
+
+  /* If we were not asked to unroll this loop and the target also does not
+     tell us otherwise, just return back silently.  */
+  if (!(flags & UAP_UNROLL_ALL)
+      && !loop->unroll
+      && decision == loop->lpt_decision.decision)
     return;
 
   if (dump_enabled_p ())
@@ -1189,26 +1207,41 @@ decide_unroll_stupid (class loop *loop, int flags)
       return;
     }
 
-  /* Check for simple loops.  */
   desc = get_simple_loop_desc (loop);
 
-  /* Check simpleness.  */
-  if (desc->simple_p && !desc->assumptions)
+  /* Skip/add a few checks if the target changed the decision.  */
+  if (decision == loop->lpt_decision.decision)
     {
-      if (dump_file)
-	fprintf (dump_file, ";; Loop is simple\n");
-      return;
-    }
+      /* Check simpleness.  */
+      if (desc->simple_p && !desc->assumptions)
+	{
+	  if (dump_file)
+	    fprintf (dump_file, ";; Loop is simple\n");
+	  return;
+	}
 
-  /* Do not unroll loops with branches inside -- it increases number
-     of mispredicts. 
-     TODO: this heuristic needs tunning; call inside the loop body
-     is also relatively good reason to not unroll.  */
-  if (num_loop_branches (loop) > 1)
+      /* Do not unroll loops with branches inside -- it increases number
+	 of mispredicts.
+	 TODO: this heuristic needs tunning; call inside the loop body
+	 is also relatively good reason to not unroll.  */
+      if (num_loop_branches (loop) > 1)
+	{
+	  if (dump_file)
+	    fprintf (dump_file, ";; Not unrolling, contains branches\n");
+	  return;
+	}
+    }
+  else
     {
-      if (dump_file)
-	fprintf (dump_file, ";; Not unrolling, contains branches\n");
-      return;
+      /* For target effected loops, adding the simpleness check from
+	 decide_unroll_runtime_iterations since we do not want to add more
+	 unrolling than before.  */
+      if (!desc->simple_p && desc->assumptions)
+	{
+	  if (dump_file)
+	    fprintf (dump_file, ";; Not unrolling, loop is not simple\n");
+	  return;
+	}
     }
 
   /* Check whether the loop rolls.  */
diff --git a/gcc/target.def b/gcc/target.def
index 8e83c2c7a7136511c07a5bc9e18876c91a38b955..d869495f9c8a9b9187a802298e28aeb334a94cfb 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -2680,6 +2680,18 @@ number of memory accesses.",
  unsigned, (unsigned nunroll, class loop *loop),
  NULL)
 
+/* Return a new value for loop unroll decision.  */
+DEFHOOK
+(loop_decision_adjust,
+ "This target hook returns either a new value for the loop unrolling\n\
+decision or the existing value in @var{loop}. The parameter @var{consider}\n\
+is the loop decision currently being tested. The parameter @var{loop} is a\n\
+pointer to the loop, which is going to be checked for unrolling. This target\n\
+hook is required only when the target wants to override the unrolling\n\
+decisions.",
+ enum lpt_dec, (enum lpt_dec consider, class loop *loop),
+ NULL)
+
 /* True if X is a legitimate MODE-mode immediate operand.  */
 DEFHOOK
 (legitimate_constant_p,
diff --git a/gcc/target.h b/gcc/target.h
index 843c3d7887f8b49edfe9e45abf9de760176896e4..f67084a449a145f3c8016df8b9230ae835eef363 100644
--- a/gcc/target.h
+++ b/gcc/target.h
@@ -140,6 +140,16 @@ struct ddg;
 /* This is defined in cfgloop.h .  */
 class loop;
 
+/* Structure to hold decision about unrolling/peeling.  This is used in
+   targetm.loop_unroll_adjust and in class loop.  */
+enum lpt_dec
+{
+  LPT_NONE,
+  LPT_UNROLL_CONSTANT,
+  LPT_UNROLL_RUNTIME,
+  LPT_UNROLL_STUPID
+};
+
 /* This is defined in ifcvt.h.  */
 struct noce_if_info;
 
diff --git a/gcc/testsuite/gcc.target/aarch64/pr88398.c b/gcc/testsuite/gcc.target/aarch64/pr88398.c
new file mode 100644
index 0000000000000000000000000000000000000000..ad1929a4e81df92972418ab5c7ed2a2e6db4f157
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr88398.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -funroll-loops -fdump-rtl-loop2_unroll-all" } */
+
+typedef unsigned char __uint8_t;
+typedef unsigned int __uint32_t;
+typedef unsigned long long __uint64_t;
+
+int
+foo (const __uint64_t len_limit, const __uint8_t *cur,
+             __uint32_t delta, int len) {
+
+  const __uint8_t *pb = cur - delta;
+
+  while (++len != len_limit) {
+    if (pb[len] != cur[len])
+      break;
+  }
+
+  return len;
+}
+
+/* { dg-final { scan-rtl-dump "considering unrolling loop stupidly" "loop2_unroll" } } */
+/* { dg-final { scan-rtl-dump "optimized: loop unrolled 7 times" "loop2_unroll" } } */

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]