This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] Avoid peeling in cunrolli


On Wed, 29 Nov 2017, Richard Biener wrote:

> 
> It turns out that we don't vectorize the 2nd testcase in PR83202
> (or rather we do that in weird ways during BB vectorization) because
> cunrolli decides to peel the inner loop completely based on
> the size of the accessed arrays.  That unfortunately leaves exit
> tests in the outer loop body which in turn makes us not vectorize
> the loop.
> 
> We have a late unrolling pass for these kind of unrollings so this
> patch simply avoids doing this during cunrolli.
> 
> Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

And this is what I applied after reviewing testsuite regressions of
the first.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

Richard.

2017-11-30  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/83202
	* tree-ssa-loop-ivcanon.c (try_unroll_loop_completely): Add
	allow_peel argument and guard peeling.
	(canonicalize_loop_induction_variables): Likewise.
	(canonicalize_induction_variables): Pass false.
	(tree_unroll_loops_completely_1): Pass unroll_outer to disallow
	peeling from cunrolli.

	* gcc.dg/vect/pr83202-1.c: New testcase.
	* gcc.dg/tree-ssa/pr61743-1.c: Adjust.

Index: gcc/tree-ssa-loop-ivcanon.c
===================================================================
--- gcc/tree-ssa-loop-ivcanon.c	(revision 255201)
+++ gcc/tree-ssa-loop-ivcanon.c	(working copy)
@@ -679,7 +679,7 @@ try_unroll_loop_completely (struct loop
 			    edge exit, tree niter,
 			    enum unroll_level ul,
 			    HOST_WIDE_INT maxiter,
-			    location_t locus)
+			    location_t locus, bool allow_peel)
 {
   unsigned HOST_WIDE_INT n_unroll = 0;
   bool n_unroll_found = false;
@@ -711,7 +711,8 @@ try_unroll_loop_completely (struct loop
     exit = NULL;
 
   /* See if we can improve our estimate by using recorded loop bounds.  */
-  if (maxiter >= 0
+  if ((allow_peel || maxiter == 0 || ul == UL_NO_GROWTH)
+      && maxiter >= 0
       && (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll))
     {
       n_unroll = maxiter;
@@ -1139,7 +1140,7 @@ try_peel_loop (struct loop *loop,
 static bool
 canonicalize_loop_induction_variables (struct loop *loop,
 				       bool create_iv, enum unroll_level ul,
-				       bool try_eval)
+				       bool try_eval, bool allow_peel)
 {
   edge exit = NULL;
   tree niter;
@@ -1207,7 +1208,8 @@ canonicalize_loop_induction_variables (s
      populates the loop bounds.  */
   modified |= remove_redundant_iv_tests (loop);
 
-  if (try_unroll_loop_completely (loop, exit, niter, ul, maxiter, locus))
+  if (try_unroll_loop_completely (loop, exit, niter, ul, maxiter, locus,
+				  allow_peel))
     return true;
 
   if (create_iv
@@ -1238,7 +1240,7 @@ canonicalize_induction_variables (void)
     {
       changed |= canonicalize_loop_induction_variables (loop,
 							true, UL_SINGLE_ITER,
-							true);
+							true, false);
     }
   gcc_assert (!need_ssa_update_p (cfun));
 
@@ -1353,7 +1355,7 @@ tree_unroll_loops_completely_1 (bool may
     ul = UL_NO_GROWTH;
 
   if (canonicalize_loop_induction_variables
-        (loop, false, ul, !flag_tree_loop_ivcanon))
+        (loop, false, ul, !flag_tree_loop_ivcanon, unroll_outer))
     {
       /* If we'll continue unrolling, we need to propagate constants
 	 within the new basic blocks to fold away induction variable
Index: gcc/testsuite/gcc.dg/vect/pr83202-1.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/pr83202-1.c	(nonexistent)
+++ gcc/testsuite/gcc.dg/vect/pr83202-1.c	(working copy)
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_double } */
+
+void test(double data[8][8])
+{
+  for (int i = 0; i < 8; i++)
+    {
+      for (int j = 0; j < i; j+=4)
+	{
+	  data[i][j] *= data[i][j];
+	  data[i][j+1] *= data[i][j+1];
+	  data[i][j+2] *= data[i][j+2];
+	  data[i][j+3] *= data[i][j+3];
+	}
+    }
+}
+
+/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" "vect" } } */
+/* { dg-final { scan-tree-dump "ectorized 1 loops" "vect" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/pr61743-1.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/pr61743-1.c	(revision 255201)
+++ gcc/testsuite/gcc.dg/tree-ssa/pr61743-1.c	(working copy)
@@ -48,5 +48,6 @@ int foo1 (e_u8 a[4][N], int b1, int b2,
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "loop with 3 iterations completely unrolled" 8 "cunroll" } } */
-/* { dg-final { scan-tree-dump-times "loop with 8 iterations completely unrolled" 2 "cunrolli" } } */
+/* { dg-final { scan-tree-dump-times "loop with 3 iterations completely unrolled" 2 "cunroll" } } */
+/* { dg-final { scan-tree-dump-times "loop with 7 iterations completely unrolled" 2 "cunroll" } } */
+/* { dg-final { scan-tree-dump-not "completely unrolled" "cunrolli" } } */


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]