[PATCH] Fixing PR59006 and PR58921 by delaying loop invariant hoisting in vectorizer.

Richard Biener rguenther@suse.de
Mon Jan 13 13:38:00 GMT 2014


On Wed, 27 Nov 2013, Jakub Jelinek wrote:

> On Wed, Nov 27, 2013 at 10:53:56AM +0100, Richard Biener wrote:
> > Hmm.  I'm still thinking that we should handle this during the regular
> > transform step.
> 
> I wonder if it can't be done instead just in vectorizable_load,
> if LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo) and the load is
> invariant, just emit the (broadcasted) load not inside of the loop, but on
> the loop preheader edge.

So this implements this suggestion, XFAILing the no longer handled cases.
For example we get

  _94 = *b_8(D);
  vect_cst_.18_95 = {_94, _94, _94, _94};
  _99 = prolog_loop_adjusted_niters.9_132 * 4;
  vectp_a.22_98 = a_6(D) + _99;
  ivtmp.43_77 = (unsigned long) vectp_a.22_98;

  <bb 13>:
  # ivtmp.41_67 = PHI <ivtmp.41_70(3), 0(12)>
  # ivtmp.43_71 = PHI <ivtmp.43_69(3), ivtmp.43_77(12)>
  vect__10.19_97 = vect_cst_.18_95 + { 1, 1, 1, 1 };
  _76 = (void *) ivtmp.43_71;
  MEM[base: _76, offset: 0B] = vect__10.19_97;

...

instead of having hoisted *b_8 + 1 as scalar computation.  Not sure
why LIM doesn't hoist the vector variant later.

vect__10.19_97 = vect_cst_.18_95 + vect_cst_.20_96;
  invariant up to level 1, cost 1.

ah, the cost thing.  Should be "improved" to see that hoisting
reduces the number of live SSA names in the loop.

Eventually lower_vector_ssa could optimize vector to scalar
code again ... (ick).

Bootstrap / regtest running on x86_64.

Comments?

Thanks,
Richard.

2014-01-13  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/58921
	PR tree-optimization/59006
	* tree-vect-loop-manip.c (vect_loop_versioning): Remove code
	hoisting invariant stmts.
	* tree-vect-stmts.c (vectorizable_load): Insert the splat of
	invariant loads on the preheader edge if possible.

	* gcc.dg/torture/pr58921.c: New testcase.
	* gcc.dg/torture/pr59006.c: Likewise.
	* gcc.dg/vect/pr58508.c: XFAIL no longer handled cases.

Index: gcc/tree-vect-loop-manip.c
===================================================================
*** gcc/tree-vect-loop-manip.c	(revision 206576)
--- gcc/tree-vect-loop-manip.c	(working copy)
*************** vect_loop_versioning (loop_vec_info loop
*** 2435,2507 ****
  	}
      }
  
- 
-   /* Extract load statements on memrefs with zero-stride accesses.  */
- 
-   if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
-     {
-       /* In the loop body, we iterate each statement to check if it is a load.
- 	 Then we check the DR_STEP of the data reference.  If DR_STEP is zero,
- 	 then we will hoist the load statement to the loop preheader.  */
- 
-       basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
-       int nbbs = loop->num_nodes;
- 
-       for (int i = 0; i < nbbs; ++i)
- 	{
- 	  for (gimple_stmt_iterator si = gsi_start_bb (bbs[i]);
- 	       !gsi_end_p (si);)
- 	    {
- 	      gimple stmt = gsi_stmt (si);
- 	      stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
- 	      struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
- 
- 	      if (is_gimple_assign (stmt)
- 		  && (!dr
- 		      || (DR_IS_READ (dr) && integer_zerop (DR_STEP (dr)))))
- 		{
- 		  bool hoist = true;
- 		  ssa_op_iter iter;
- 		  tree var;
- 
- 		  /* We hoist a statement if all SSA uses in it are defined
- 		     outside of the loop.  */
- 		  FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_USE)
- 		    {
- 		      gimple def = SSA_NAME_DEF_STMT (var);
- 		      if (!gimple_nop_p (def)
- 			  && flow_bb_inside_loop_p (loop, gimple_bb (def)))
- 			{
- 			  hoist = false;
- 			  break;
- 			}
- 		    }
- 
- 		  if (hoist)
- 		    {
- 		      if (dr)
- 			gimple_set_vuse (stmt, NULL);
- 
- 		      gsi_remove (&si, false);
- 		      gsi_insert_on_edge_immediate (loop_preheader_edge (loop),
- 						    stmt);
- 
- 		      if (dump_enabled_p ())
- 			{
- 			  dump_printf_loc
- 			      (MSG_NOTE, vect_location,
- 			       "hoisting out of the vectorized loop: ");
- 			  dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
- 			  dump_printf (MSG_NOTE, "\n");
- 			}
- 		      continue;
- 		    }
- 		}
- 	      gsi_next (&si);
- 	    }
- 	}
-     }
- 
    /* End loop-exit-fixes after versioning.  */
  
    if (cond_expr_stmt_list)
--- 2435,2440 ----
Index: gcc/tree-vect-stmts.c
===================================================================
*** gcc/tree-vect-stmts.c	(revision 206576)
--- gcc/tree-vect-stmts.c	(working copy)
*************** vectorizable_load (gimple stmt, gimple_s
*** 6368,6378 ****
  	      /* 4. Handle invariant-load.  */
  	      if (inv_p && !bb_vinfo)
  		{
- 		  gimple_stmt_iterator gsi2 = *gsi;
  		  gcc_assert (!grouped_load);
! 		  gsi_next (&gsi2);
! 		  new_temp = vect_init_vector (stmt, scalar_dest,
! 					       vectype, &gsi2);
  		  new_stmt = SSA_NAME_DEF_STMT (new_temp);
  		}
  
--- 6368,6402 ----
  	      /* 4. Handle invariant-load.  */
  	      if (inv_p && !bb_vinfo)
  		{
  		  gcc_assert (!grouped_load);
! 		  /* If we have versioned for aliasing then we are sure
! 		     this is a loop invariant load and thus we can insert
! 		     it on the preheader edge.  */
! 		  if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
! 		    {
! 		      if (dump_enabled_p ())
! 			{
! 			  dump_printf_loc (MSG_NOTE, vect_location,
! 					   "hoisting out of the vectorized "
! 					   "loop: ");
! 			  dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
! 			  dump_printf (MSG_NOTE, "\n");
! 			}
! 		      tree tem = copy_ssa_name (scalar_dest, NULL);
! 		      gsi_insert_on_edge_immediate
! 			(loop_preheader_edge (loop),
! 			 gimple_build_assign (tem,
! 					      unshare_expr
! 					        (gimple_assign_rhs1 (stmt))));
! 		      new_temp = vect_init_vector (stmt, tem, vectype, NULL);
! 		    }
! 		  else
! 		    {
! 		      gimple_stmt_iterator gsi2 = *gsi;
! 		      gsi_next (&gsi2);
! 		      new_temp = vect_init_vector (stmt, scalar_dest,
! 						   vectype, &gsi2);
! 		    }
  		  new_stmt = SSA_NAME_DEF_STMT (new_temp);
  		}
  
Index: gcc/testsuite/gcc.dg/torture/pr58921.c
===================================================================
*** gcc/testsuite/gcc.dg/torture/pr58921.c	(revision 0)
--- gcc/testsuite/gcc.dg/torture/pr58921.c	(working copy)
***************
*** 0 ****
--- 1,17 ----
+ /* { dg-do compile } */
+ 
+ int a[7];
+ int b;
+ 
+ void
+ fn1 ()
+ {
+   for (; b; b++)
+     a[b] = ((a[b] <= 0) == (a[0] != 0));
+ }
+ 
+ int
+ main ()
+ {
+   return 0;
+ }
Index: gcc/testsuite/gcc.dg/torture/pr59006.c
===================================================================
*** gcc/testsuite/gcc.dg/torture/pr59006.c	(revision 0)
--- gcc/testsuite/gcc.dg/torture/pr59006.c	(working copy)
***************
*** 0 ****
--- 1,13 ----
+ /* { dg-do compile } */
+ 
+ int a[8], b;
+ void fn1(void)
+ {
+   int c;
+   for (; b; b++)
+     {
+       int d = a[b];
+       c = a[0] ? d : 0;
+       a[b] = c;
+     }
+ }
Index: gcc/testsuite/gcc.dg/vect/pr58508.c
===================================================================
*** gcc/testsuite/gcc.dg/vect/pr58508.c	(revision 206576)
--- gcc/testsuite/gcc.dg/vect/pr58508.c	(working copy)
*************** void test5 (int* a, int* b)
*** 66,70 ****
      }
  }
  
! /* { dg-final { scan-tree-dump-times "hoist" 8 "vect" { xfail vect_no_align } } } */
  /* { dg-final { cleanup-tree-dump "vect" } } */
--- 66,71 ----
      }
  }
  
! /* { dg-final { scan-tree-dump-times "hoist" 8 "vect" { xfail *-*-* } } } */
! /* { dg-final { scan-tree-dump-times "hoist" 3 "vect" { xfail vect_no_align } } } */
  /* { dg-final { cleanup-tree-dump "vect" } } */



More information about the Gcc-patches mailing list